code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42
  43 from ganeti import ssh
  44 from ganeti import utils
  45 from ganeti import errors
  46 from ganeti import hypervisor
  47 from ganeti import locking
  48 from ganeti import constants
  49 from ganeti import objects
  50 from ganeti import serializer
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import ht
  57
  58 import ganeti.masterd.instance # pylint: disable-msg=W0611
  59
  60 # Common opcode attributes
  61
  62 #: output fields for a query operation
  63 _POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
  64
  65
  66 #: the shutdown timeout
  67 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
  68                      ht.TPositiveInt)
  69
  70 #: the force parameter
  71 _PForce = ("force", False, ht.TBool)
  72
  73 #: a required instance name (for single-instance LUs)
  74 _PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
  75
  76 #: Whether to ignore offline nodes
  77 _PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
  78
  79 #: a required node name (for single-node LUs)
  80 _PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
  81
  82 #: the migration type (live/non-live)
  83 _PMigrationMode = ("mode", None,
  84                    ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
  85
  86 #: the obsolete 'live' mode (boolean)
  87 _PMigrationLive = ("live", None, ht.TMaybeBool)
  88
  89
  90 # End types
  91 class LogicalUnit(object):
  92   """Logical Unit base class.
  93
  94   Subclasses must follow these rules:
  95     - implement ExpandNames
  96     - implement CheckPrereq (except when tasklets are used)
  97     - implement Exec (except when tasklets are used)
  98     - implement BuildHooksEnv
  99     - redefine HPATH and HTYPE
 100     - optionally redefine their run requirements:
 101         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 102
 103   Note that all commands require root permissions.
 104
 105   @ivar dry_run_result: the value (if any) that will be returned to the caller
 106       in dry-run mode (signalled by opcode dry_run parameter)
 107   @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
 108       they should get if not already defined, and types they must match
 109
 110   """
 111   HPATH = None
 112   HTYPE = None
 113   _OP_PARAMS = []
 114   REQ_BGL = True
 115
 116   def __init__(self, processor, op, context, rpc):
 117     """Constructor for LogicalUnit.
 118
 119     This needs to be overridden in derived classes in order to check op
 120     validity.
 121
 122     """
 123     self.proc = processor
 124     self.op = op
 125     self.cfg = context.cfg
 126     self.context = context
 127     self.rpc = rpc
 128     # Dicts used to declare locking needs to mcpu
 129     self.needed_locks = None
 130     self.acquired_locks = {}
 131     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 132     self.add_locks = {}
 133     self.remove_locks = {}
 134     # Used to force good behavior when calling helper functions
 135     self.recalculate_locks = {}
 136     self.__ssh = None
 137     # logging
 138     self.Log = processor.Log # pylint: disable-msg=C0103
 139     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 140     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 141     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 142     # support for dry-run
 143     self.dry_run_result = None
 144     # support for generic debug attribute
 145     if (not hasattr(self.op, "debug_level") or
 146         not isinstance(self.op.debug_level, int)):
 147       self.op.debug_level = 0
 148
 149     # Tasklets
 150     self.tasklets = None
 151
 152     # The new kind-of-type-system
 153     op_id = self.op.OP_ID
 154     for attr_name, aval, test in self._OP_PARAMS:
 155       if not hasattr(op, attr_name):
 156         if aval == ht.NoDefault:
 157           raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
 158                                      (op_id, attr_name), errors.ECODE_INVAL)
 159         else:
 160           if callable(aval):
 161             dval = aval()
 162           else:
 163             dval = aval
 164           setattr(self.op, attr_name, dval)
 165       attr_val = getattr(op, attr_name)
 166       if test == ht.NoType:
 167         # no tests here
 168         continue
 169       if not callable(test):
 170         raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
 171                                      " given type is not a proper type (%s)" %
 172                                      (op_id, attr_name, test))
 173       if not test(attr_val):
 174         logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
 175                       self.op.OP_ID, attr_name, type(attr_val), attr_val)
 176         raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
 177                                    (op_id, attr_name), errors.ECODE_INVAL)
 178
 179     self.CheckArguments()
 180
 181   def __GetSSH(self):
 182     """Returns the SshRunner object
 183
 184     """
 185     if not self.__ssh:
 186       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 187     return self.__ssh
 188
 189   ssh = property(fget=__GetSSH)
 190
 191   def CheckArguments(self):
 192     """Check syntactic validity for the opcode arguments.
 193
 194     This method is for doing a simple syntactic check and ensure
 195     validity of opcode parameters, without any cluster-related
 196     checks. While the same can be accomplished in ExpandNames and/or
 197     CheckPrereq, doing these separate is better because:
 198
 199       - ExpandNames is left as as purely a lock-related function
 200       - CheckPrereq is run after we have acquired locks (and possible
 201         waited for them)
 202
 203     The function is allowed to change the self.op attribute so that
 204     later methods can no longer worry about missing parameters.
 205
 206     """
 207     pass
 208
 209   def ExpandNames(self):
 210     """Expand names for this LU.
 211
 212     This method is called before starting to execute the opcode, and it should
 213     update all the parameters of the opcode to their canonical form (e.g. a
 214     short node name must be fully expanded after this method has successfully
 215     completed). This way locking, hooks, logging, ecc. can work correctly.
 216
 217     LUs which implement this method must also populate the self.needed_locks
 218     member, as a dict with lock levels as keys, and a list of needed lock names
 219     as values. Rules:
 220
 221       - use an empty dict if you don't need any lock
 222       - if you don't need any lock at a particular level omit that level
 223       - don't put anything for the BGL level
 224       - if you want all locks at a level use locking.ALL_SET as a value
 225
 226     If you need to share locks (rather than acquire them exclusively) at one
 227     level you can modify self.share_locks, setting a true value (usually 1) for
 228     that level. By default locks are not shared.
 229
 230     This function can also define a list of tasklets, which then will be
 231     executed in order instead of the usual LU-level CheckPrereq and Exec
 232     functions, if those are not defined by the LU.
 233
 234     Examples::
 235
 236       # Acquire all nodes and one instance
 237       self.needed_locks = {
 238         locking.LEVEL_NODE: locking.ALL_SET,
 239         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 240       }
 241       # Acquire just two nodes
 242       self.needed_locks = {
 243         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 244       }
 245       # Acquire no locks
 246       self.needed_locks = {} # No, you can't leave it to the default value None
 247
 248     """
 249     # The implementation of this method is mandatory only if the new LU is
 250     # concurrent, so that old LUs don't need to be changed all at the same
 251     # time.
 252     if self.REQ_BGL:
 253       self.needed_locks = {} # Exclusive LUs don't need locks.
 254     else:
 255       raise NotImplementedError
 256
 257   def DeclareLocks(self, level):
 258     """Declare LU locking needs for a level
 259
 260     While most LUs can just declare their locking needs at ExpandNames time,
 261     sometimes there's the need to calculate some locks after having acquired
 262     the ones before. This function is called just before acquiring locks at a
 263     particular level, but after acquiring the ones at lower levels, and permits
 264     such calculations. It can be used to modify self.needed_locks, and by
 265     default it does nothing.
 266
 267     This function is only called if you have something already set in
 268     self.needed_locks for the level.
 269
 270     @param level: Locking level which is going to be locked
 271     @type level: member of ganeti.locking.LEVELS
 272
 273     """
 274
 275   def CheckPrereq(self):
 276     """Check prerequisites for this LU.
 277
 278     This method should check that the prerequisites for the execution
 279     of this LU are fulfilled. It can do internode communication, but
 280     it should be idempotent - no cluster or system changes are
 281     allowed.
 282
 283     The method should raise errors.OpPrereqError in case something is
 284     not fulfilled. Its return value is ignored.
 285
 286     This method should also update all the parameters of the opcode to
 287     their canonical form if it hasn't been done by ExpandNames before.
 288
 289     """
 290     if self.tasklets is not None:
 291       for (idx, tl) in enumerate(self.tasklets):
 292         logging.debug("Checking prerequisites for tasklet %s/%s",
 293                       idx + 1, len(self.tasklets))
 294         tl.CheckPrereq()
 295     else:
 296       pass
 297
 298   def Exec(self, feedback_fn):
 299     """Execute the LU.
 300
 301     This method should implement the actual work. It should raise
 302     errors.OpExecError for failures that are somewhat dealt with in
 303     code, or expected.
 304
 305     """
 306     if self.tasklets is not None:
 307       for (idx, tl) in enumerate(self.tasklets):
 308         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 309         tl.Exec(feedback_fn)
 310     else:
 311       raise NotImplementedError
 312
 313   def BuildHooksEnv(self):
 314     """Build hooks environment for this LU.
 315
 316     This method should return a three-node tuple consisting of: a dict
 317     containing the environment that will be used for running the
 318     specific hook for this LU, a list of node names on which the hook
 319     should run before the execution, and a list of node names on which
 320     the hook should run after the execution.
 321
 322     The keys of the dict must not have 'GANETI_' prefixed as this will
 323     be handled in the hooks runner. Also note additional keys will be
 324     added by the hooks runner. If the LU doesn't define any
 325     environment, an empty dict (and not None) should be returned.
 326
 327     No nodes should be returned as an empty list (and not None).
 328
 329     Note that if the HPATH for a LU class is None, this function will
 330     not be called.
 331
 332     """
 333     raise NotImplementedError
 334
 335   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 336     """Notify the LU about the results of its hooks.
 337
 338     This method is called every time a hooks phase is executed, and notifies
 339     the Logical Unit about the hooks' result. The LU can then use it to alter
 340     its result based on the hooks.  By default the method does nothing and the
 341     previous result is passed back unchanged but any LU can define it if it
 342     wants to use the local cluster hook-scripts somehow.
 343
 344     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 345         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 346     @param hook_results: the results of the multi-node hooks rpc call
 347     @param feedback_fn: function used send feedback back to the caller
 348     @param lu_result: the previous Exec result this LU had, or None
 349         in the PRE phase
 350     @return: the new Exec result, based on the previous result
 351         and hook results
 352
 353     """
 354     # API must be kept, thus we ignore the unused argument and could
 355     # be a function warnings
 356     # pylint: disable-msg=W0613,R0201
 357     return lu_result
 358
 359   def _ExpandAndLockInstance(self):
 360     """Helper function to expand and lock an instance.
 361
 362     Many LUs that work on an instance take its name in self.op.instance_name
 363     and need to expand it and then declare the expanded name for locking. This
 364     function does it, and then updates self.op.instance_name to the expanded
 365     name. It also initializes needed_locks as a dict, if this hasn't been done
 366     before.
 367
 368     """
 369     if self.needed_locks is None:
 370       self.needed_locks = {}
 371     else:
 372       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 373         "_ExpandAndLockInstance called with instance-level locks set"
 374     self.op.instance_name = _ExpandInstanceName(self.cfg,
 375                                                 self.op.instance_name)
 376     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 377
 378   def _LockInstancesNodes(self, primary_only=False):
 379     """Helper function to declare instances' nodes for locking.
 380
 381     This function should be called after locking one or more instances to lock
 382     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 383     with all primary or secondary nodes for instances already locked and
 384     present in self.needed_locks[locking.LEVEL_INSTANCE].
 385
 386     It should be called from DeclareLocks, and for safety only works if
 387     self.recalculate_locks[locking.LEVEL_NODE] is set.
 388
 389     In the future it may grow parameters to just lock some instance's nodes, or
 390     to just lock primaries or secondary nodes, if needed.
 391
 392     If should be called in DeclareLocks in a way similar to::
 393
 394       if level == locking.LEVEL_NODE:
 395         self._LockInstancesNodes()
 396
 397     @type primary_only: boolean
 398     @param primary_only: only lock primary nodes of locked instances
 399
 400     """
 401     assert locking.LEVEL_NODE in self.recalculate_locks, \
 402       "_LockInstancesNodes helper function called with no nodes to recalculate"
 403
 404     # TODO: check if we're really been called with the instance locks held
 405
 406     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 407     # future we might want to have different behaviors depending on the value
 408     # of self.recalculate_locks[locking.LEVEL_NODE]
 409     wanted_nodes = []
 410     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 411       instance = self.context.cfg.GetInstanceInfo(instance_name)
 412       wanted_nodes.append(instance.primary_node)
 413       if not primary_only:
 414         wanted_nodes.extend(instance.secondary_nodes)
 415
 416     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 417       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 418     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 419       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 420
 421     del self.recalculate_locks[locking.LEVEL_NODE]
 422
 423
 424 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 425   """Simple LU which runs no hooks.
 426
 427   This LU is intended as a parent for other LogicalUnits which will
 428   run no hooks, in order to reduce duplicate code.
 429
 430   """
 431   HPATH = None
 432   HTYPE = None
 433
 434   def BuildHooksEnv(self):
 435     """Empty BuildHooksEnv for NoHooksLu.
 436
 437     This just raises an error.
 438
 439     """
 440     assert False, "BuildHooksEnv called for NoHooksLUs"
 441
 442
 443 class Tasklet:
 444   """Tasklet base class.
 445
 446   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 447   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 448   tasklets know nothing about locks.
 449
 450   Subclasses must follow these rules:
 451     - Implement CheckPrereq
 452     - Implement Exec
 453
 454   """
 455   def __init__(self, lu):
 456     self.lu = lu
 457
 458     # Shortcuts
 459     self.cfg = lu.cfg
 460     self.rpc = lu.rpc
 461
 462   def CheckPrereq(self):
 463     """Check prerequisites for this tasklets.
 464
 465     This method should check whether the prerequisites for the execution of
 466     this tasklet are fulfilled. It can do internode communication, but it
 467     should be idempotent - no cluster or system changes are allowed.
 468
 469     The method should raise errors.OpPrereqError in case something is not
 470     fulfilled. Its return value is ignored.
 471
 472     This method should also update all parameters to their canonical form if it
 473     hasn't been done before.
 474
 475     """
 476     pass
 477
 478   def Exec(self, feedback_fn):
 479     """Execute the tasklet.
 480
 481     This method should implement the actual work. It should raise
 482     errors.OpExecError for failures that are somewhat dealt with in code, or
 483     expected.
 484
 485     """
 486     raise NotImplementedError
 487
 488
 489 def _GetWantedNodes(lu, nodes):
 490   """Returns list of checked and expanded node names.
 491
 492   @type lu: L{LogicalUnit}
 493   @param lu: the logical unit on whose behalf we execute
 494   @type nodes: list
 495   @param nodes: list of node names or None for all nodes
 496   @rtype: list
 497   @return: the list of nodes, sorted
 498   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 499
 500   """
 501   if not nodes:
 502     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 503       " non-empty list of nodes whose name is to be expanded.")
 504
 505   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 506   return utils.NiceSort(wanted)
 507
 508
 509 def _GetWantedInstances(lu, instances):
 510   """Returns list of checked and expanded instance names.
 511
 512   @type lu: L{LogicalUnit}
 513   @param lu: the logical unit on whose behalf we execute
 514   @type instances: list
 515   @param instances: list of instance names or None for all instances
 516   @rtype: list
 517   @return: the list of instances, sorted
 518   @raise errors.OpPrereqError: if the instances parameter is wrong type
 519   @raise errors.OpPrereqError: if any of the passed instances is not found
 520
 521   """
 522   if instances:
 523     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 524   else:
 525     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 526   return wanted
 527
 528
 529 def _GetUpdatedParams(old_params, update_dict,
 530                       use_default=True, use_none=False):
 531   """Return the new version of a parameter dictionary.
 532
 533   @type old_params: dict
 534   @param old_params: old parameters
 535   @type update_dict: dict
 536   @param update_dict: dict containing new parameter values, or
 537       constants.VALUE_DEFAULT to reset the parameter to its default
 538       value
 539   @param use_default: boolean
 540   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 541       values as 'to be deleted' values
 542   @param use_none: boolean
 543   @type use_none: whether to recognise C{None} values as 'to be
 544       deleted' values
 545   @rtype: dict
 546   @return: the new parameter dictionary
 547
 548   """
 549   params_copy = copy.deepcopy(old_params)
 550   for key, val in update_dict.iteritems():
 551     if ((use_default and val == constants.VALUE_DEFAULT) or
 552         (use_none and val is None)):
 553       try:
 554         del params_copy[key]
 555       except KeyError:
 556         pass
 557     else:
 558       params_copy[key] = val
 559   return params_copy
 560
 561
 562 def _CheckOutputFields(static, dynamic, selected):
 563   """Checks whether all selected fields are valid.
 564
 565   @type static: L{utils.FieldSet}
 566   @param static: static fields set
 567   @type dynamic: L{utils.FieldSet}
 568   @param dynamic: dynamic fields set
 569
 570   """
 571   f = utils.FieldSet()
 572   f.Extend(static)
 573   f.Extend(dynamic)
 574
 575   delta = f.NonMatching(selected)
 576   if delta:
 577     raise errors.OpPrereqError("Unknown output fields selected: %s"
 578                                % ",".join(delta), errors.ECODE_INVAL)
 579
 580
 581 def _CheckGlobalHvParams(params):
 582   """Validates that given hypervisor params are not global ones.
 583
 584   This will ensure that instances don't get customised versions of
 585   global params.
 586
 587   """
 588   used_globals = constants.HVC_GLOBALS.intersection(params)
 589   if used_globals:
 590     msg = ("The following hypervisor parameters are global and cannot"
 591            " be customized at instance level, please modify them at"
 592            " cluster level: %s" % utils.CommaJoin(used_globals))
 593     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 594
 595
 596 def _CheckNodeOnline(lu, node, msg=None):
 597   """Ensure that a given node is online.
 598
 599   @param lu: the LU on behalf of which we make the check
 600   @param node: the node to check
 601   @param msg: if passed, should be a message to replace the default one
 602   @raise errors.OpPrereqError: if the node is offline
 603
 604   """
 605   if msg is None:
 606     msg = "Can't use offline node"
 607   if lu.cfg.GetNodeInfo(node).offline:
 608     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 609
 610
 611 def _CheckNodeNotDrained(lu, node):
 612   """Ensure that a given node is not drained.
 613
 614   @param lu: the LU on behalf of which we make the check
 615   @param node: the node to check
 616   @raise errors.OpPrereqError: if the node is drained
 617
 618   """
 619   if lu.cfg.GetNodeInfo(node).drained:
 620     raise errors.OpPrereqError("Can't use drained node %s" % node,
 621                                errors.ECODE_STATE)
 622
 623
 624 def _CheckNodeVmCapable(lu, node):
 625   """Ensure that a given node is vm capable.
 626
 627   @param lu: the LU on behalf of which we make the check
 628   @param node: the node to check
 629   @raise errors.OpPrereqError: if the node is not vm capable
 630
 631   """
 632   if not lu.cfg.GetNodeInfo(node).vm_capable:
 633     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 634                                errors.ECODE_STATE)
 635
 636
 637 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 638   """Ensure that a node supports a given OS.
 639
 640   @param lu: the LU on behalf of which we make the check
 641   @param node: the node to check
 642   @param os_name: the OS to query about
 643   @param force_variant: whether to ignore variant errors
 644   @raise errors.OpPrereqError: if the node is not supporting the OS
 645
 646   """
 647   result = lu.rpc.call_os_get(node, os_name)
 648   result.Raise("OS '%s' not in supported OS list for node %s" %
 649                (os_name, node),
 650                prereq=True, ecode=errors.ECODE_INVAL)
 651   if not force_variant:
 652     _CheckOSVariant(result.payload, os_name)
 653
 654
 655 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 656   """Ensure that a node has the given secondary ip.
 657
 658   @type lu: L{LogicalUnit}
 659   @param lu: the LU on behalf of which we make the check
 660   @type node: string
 661   @param node: the node to check
 662   @type secondary_ip: string
 663   @param secondary_ip: the ip to check
 664   @type prereq: boolean
 665   @param prereq: whether to throw a prerequisite or an execute error
 666   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 667   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 668
 669   """
 670   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 671   result.Raise("Failure checking secondary ip on node %s" % node,
 672                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 673   if not result.payload:
 674     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 675            " please fix and re-run this command" % secondary_ip)
 676     if prereq:
 677       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 678     else:
 679       raise errors.OpExecError(msg)
 680
 681
 682 def _RequireFileStorage():
 683   """Checks that file storage is enabled.
 684
 685   @raise errors.OpPrereqError: when file storage is disabled
 686
 687   """
 688   if not constants.ENABLE_FILE_STORAGE:
 689     raise errors.OpPrereqError("File storage disabled at configure time",
 690                                errors.ECODE_INVAL)
 691
 692
 693 def _CheckDiskTemplate(template):
 694   """Ensure a given disk template is valid.
 695
 696   """
 697   if template not in constants.DISK_TEMPLATES:
 698     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 699            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 700     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 701   if template == constants.DT_FILE:
 702     _RequireFileStorage()
 703   return True
 704
 705
 706 def _CheckStorageType(storage_type):
 707   """Ensure a given storage type is valid.
 708
 709   """
 710   if storage_type not in constants.VALID_STORAGE_TYPES:
 711     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 712                                errors.ECODE_INVAL)
 713   if storage_type == constants.ST_FILE:
 714     _RequireFileStorage()
 715   return True
 716
 717
 718 def _GetClusterDomainSecret():
 719   """Reads the cluster domain secret.
 720
 721   """
 722   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 723                                strict=True)
 724
 725
 726 def _CheckInstanceDown(lu, instance, reason):
 727   """Ensure that an instance is not running."""
 728   if instance.admin_up:
 729     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 730                                (instance.name, reason), errors.ECODE_STATE)
 731
 732   pnode = instance.primary_node
 733   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 734   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 735               prereq=True, ecode=errors.ECODE_ENVIRON)
 736
 737   if instance.name in ins_l.payload:
 738     raise errors.OpPrereqError("Instance %s is running, %s" %
 739                                (instance.name, reason), errors.ECODE_STATE)
 740
 741
 742 def _ExpandItemName(fn, name, kind):
 743   """Expand an item name.
 744
 745   @param fn: the function to use for expansion
 746   @param name: requested item name
 747   @param kind: text description ('Node' or 'Instance')
 748   @return: the resolved (full) name
 749   @raise errors.OpPrereqError: if the item is not found
 750
 751   """
 752   full_name = fn(name)
 753   if full_name is None:
 754     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 755                                errors.ECODE_NOENT)
 756   return full_name
 757
 758
 759 def _ExpandNodeName(cfg, name):
 760   """Wrapper over L{_ExpandItemName} for nodes."""
 761   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 762
 763
 764 def _ExpandInstanceName(cfg, name):
 765   """Wrapper over L{_ExpandItemName} for instance."""
 766   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 767
 768
 769 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 770                           memory, vcpus, nics, disk_template, disks,
 771                           bep, hvp, hypervisor_name):
 772   """Builds instance related env variables for hooks
 773
 774   This builds the hook environment from individual variables.
 775
 776   @type name: string
 777   @param name: the name of the instance
 778   @type primary_node: string
 779   @param primary_node: the name of the instance's primary node
 780   @type secondary_nodes: list
 781   @param secondary_nodes: list of secondary nodes as strings
 782   @type os_type: string
 783   @param os_type: the name of the instance's OS
 784   @type status: boolean
 785   @param status: the should_run status of the instance
 786   @type memory: string
 787   @param memory: the memory size of the instance
 788   @type vcpus: string
 789   @param vcpus: the count of VCPUs the instance has
 790   @type nics: list
 791   @param nics: list of tuples (ip, mac, mode, link) representing
 792       the NICs the instance has
 793   @type disk_template: string
 794   @param disk_template: the disk template of the instance
 795   @type disks: list
 796   @param disks: the list of (size, mode) pairs
 797   @type bep: dict
 798   @param bep: the backend parameters for the instance
 799   @type hvp: dict
 800   @param hvp: the hypervisor parameters for the instance
 801   @type hypervisor_name: string
 802   @param hypervisor_name: the hypervisor for the instance
 803   @rtype: dict
 804   @return: the hook environment for this instance
 805
 806   """
 807   if status:
 808     str_status = "up"
 809   else:
 810     str_status = "down"
 811   env = {
 812     "OP_TARGET": name,
 813     "INSTANCE_NAME": name,
 814     "INSTANCE_PRIMARY": primary_node,
 815     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 816     "INSTANCE_OS_TYPE": os_type,
 817     "INSTANCE_STATUS": str_status,
 818     "INSTANCE_MEMORY": memory,
 819     "INSTANCE_VCPUS": vcpus,
 820     "INSTANCE_DISK_TEMPLATE": disk_template,
 821     "INSTANCE_HYPERVISOR": hypervisor_name,
 822   }
 823
 824   if nics:
 825     nic_count = len(nics)
 826     for idx, (ip, mac, mode, link) in enumerate(nics):
 827       if ip is None:
 828         ip = ""
 829       env["INSTANCE_NIC%d_IP" % idx] = ip
 830       env["INSTANCE_NIC%d_MAC" % idx] = mac
 831       env["INSTANCE_NIC%d_MODE" % idx] = mode
 832       env["INSTANCE_NIC%d_LINK" % idx] = link
 833       if mode == constants.NIC_MODE_BRIDGED:
 834         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 835   else:
 836     nic_count = 0
 837
 838   env["INSTANCE_NIC_COUNT"] = nic_count
 839
 840   if disks:
 841     disk_count = len(disks)
 842     for idx, (size, mode) in enumerate(disks):
 843       env["INSTANCE_DISK%d_SIZE" % idx] = size
 844       env["INSTANCE_DISK%d_MODE" % idx] = mode
 845   else:
 846     disk_count = 0
 847
 848   env["INSTANCE_DISK_COUNT"] = disk_count
 849
 850   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 851     for key, value in source.items():
 852       env["INSTANCE_%s_%s" % (kind, key)] = value
 853
 854   return env
 855
 856
 857 def _NICListToTuple(lu, nics):
 858   """Build a list of nic information tuples.
 859
 860   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 861   value in LUQueryInstanceData.
 862
 863   @type lu:  L{LogicalUnit}
 864   @param lu: the logical unit on whose behalf we execute
 865   @type nics: list of L{objects.NIC}
 866   @param nics: list of nics to convert to hooks tuples
 867
 868   """
 869   hooks_nics = []
 870   cluster = lu.cfg.GetClusterInfo()
 871   for nic in nics:
 872     ip = nic.ip
 873     mac = nic.mac
 874     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 875     mode = filled_params[constants.NIC_MODE]
 876     link = filled_params[constants.NIC_LINK]
 877     hooks_nics.append((ip, mac, mode, link))
 878   return hooks_nics
 879
 880
 881 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 882   """Builds instance related env variables for hooks from an object.
 883
 884   @type lu: L{LogicalUnit}
 885   @param lu: the logical unit on whose behalf we execute
 886   @type instance: L{objects.Instance}
 887   @param instance: the instance for which we should build the
 888       environment
 889   @type override: dict
 890   @param override: dictionary with key/values that will override
 891       our values
 892   @rtype: dict
 893   @return: the hook environment dictionary
 894
 895   """
 896   cluster = lu.cfg.GetClusterInfo()
 897   bep = cluster.FillBE(instance)
 898   hvp = cluster.FillHV(instance)
 899   args = {
 900     'name': instance.name,
 901     'primary_node': instance.primary_node,
 902     'secondary_nodes': instance.secondary_nodes,
 903     'os_type': instance.os,
 904     'status': instance.admin_up,
 905     'memory': bep[constants.BE_MEMORY],
 906     'vcpus': bep[constants.BE_VCPUS],
 907     'nics': _NICListToTuple(lu, instance.nics),
 908     'disk_template': instance.disk_template,
 909     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 910     'bep': bep,
 911     'hvp': hvp,
 912     'hypervisor_name': instance.hypervisor,
 913   }
 914   if override:
 915     args.update(override)
 916   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 917
 918
 919 def _AdjustCandidatePool(lu, exceptions):
 920   """Adjust the candidate pool after node operations.
 921
 922   """
 923   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 924   if mod_list:
 925     lu.LogInfo("Promoted nodes to master candidate role: %s",
 926                utils.CommaJoin(node.name for node in mod_list))
 927     for name in mod_list:
 928       lu.context.ReaddNode(name)
 929   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 930   if mc_now > mc_max:
 931     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 932                (mc_now, mc_max))
 933
 934
 935 def _DecideSelfPromotion(lu, exceptions=None):
 936   """Decide whether I should promote myself as a master candidate.
 937
 938   """
 939   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 940   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 941   # the new node will increase mc_max with one, so:
 942   mc_should = min(mc_should + 1, cp_size)
 943   return mc_now < mc_should
 944
 945
 946 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 947   """Check that the brigdes needed by a list of nics exist.
 948
 949   """
 950   cluster = lu.cfg.GetClusterInfo()
 951   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 952   brlist = [params[constants.NIC_LINK] for params in paramslist
 953             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 954   if brlist:
 955     result = lu.rpc.call_bridges_exist(target_node, brlist)
 956     result.Raise("Error checking bridges on destination node '%s'" %
 957                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 958
 959
 960 def _CheckInstanceBridgesExist(lu, instance, node=None):
 961   """Check that the brigdes needed by an instance exist.
 962
 963   """
 964   if node is None:
 965     node = instance.primary_node
 966   _CheckNicsBridgesExist(lu, instance.nics, node)
 967
 968
 969 def _CheckOSVariant(os_obj, name):
 970   """Check whether an OS name conforms to the os variants specification.
 971
 972   @type os_obj: L{objects.OS}
 973   @param os_obj: OS object to check
 974   @type name: string
 975   @param name: OS name passed by the user, to check for validity
 976
 977   """
 978   if not os_obj.supported_variants:
 979     return
 980   variant = objects.OS.GetVariant(name)
 981   if not variant:
 982     raise errors.OpPrereqError("OS name must include a variant",
 983                                errors.ECODE_INVAL)
 984
 985   if variant not in os_obj.supported_variants:
 986     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
 987
 988
 989 def _GetNodeInstancesInner(cfg, fn):
 990   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 991
 992
 993 def _GetNodeInstances(cfg, node_name):
 994   """Returns a list of all primary and secondary instances on a node.
 995
 996   """
 997
 998   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
 999
1000
1001 def _GetNodePrimaryInstances(cfg, node_name):
1002   """Returns primary instances on a node.
1003
1004   """
1005   return _GetNodeInstancesInner(cfg,
1006                                 lambda inst: node_name == inst.primary_node)
1007
1008
1009 def _GetNodeSecondaryInstances(cfg, node_name):
1010   """Returns secondary instances on a node.
1011
1012   """
1013   return _GetNodeInstancesInner(cfg,
1014                                 lambda inst: node_name in inst.secondary_nodes)
1015
1016
1017 def _GetStorageTypeArgs(cfg, storage_type):
1018   """Returns the arguments for a storage type.
1019
1020   """
1021   # Special case for file storage
1022   if storage_type == constants.ST_FILE:
1023     # storage.FileStorage wants a list of storage directories
1024     return [[cfg.GetFileStorageDir()]]
1025
1026   return []
1027
1028
1029 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1030   faulty = []
1031
1032   for dev in instance.disks:
1033     cfg.SetDiskID(dev, node_name)
1034
1035   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1036   result.Raise("Failed to get disk status from node %s" % node_name,
1037                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1038
1039   for idx, bdev_status in enumerate(result.payload):
1040     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1041       faulty.append(idx)
1042
1043   return faulty
1044
1045
1046 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1047   """Check the sanity of iallocator and node arguments and use the
1048   cluster-wide iallocator if appropriate.
1049
1050   Check that at most one of (iallocator, node) is specified. If none is
1051   specified, then the LU's opcode's iallocator slot is filled with the
1052   cluster-wide default iallocator.
1053
1054   @type iallocator_slot: string
1055   @param iallocator_slot: the name of the opcode iallocator slot
1056   @type node_slot: string
1057   @param node_slot: the name of the opcode target node slot
1058
1059   """
1060   node = getattr(lu.op, node_slot, None)
1061   iallocator = getattr(lu.op, iallocator_slot, None)
1062
1063   if node is not None and iallocator is not None:
1064     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1065                                errors.ECODE_INVAL)
1066   elif node is None and iallocator is None:
1067     default_iallocator = lu.cfg.GetDefaultIAllocator()
1068     if default_iallocator:
1069       setattr(lu.op, iallocator_slot, default_iallocator)
1070     else:
1071       raise errors.OpPrereqError("No iallocator or node given and no"
1072                                  " cluster-wide default iallocator found."
1073                                  " Please specify either an iallocator or a"
1074                                  " node, or set a cluster-wide default"
1075                                  " iallocator.")
1076
1077
1078 class LUPostInitCluster(LogicalUnit):
1079   """Logical unit for running hooks after cluster initialization.
1080
1081   """
1082   HPATH = "cluster-init"
1083   HTYPE = constants.HTYPE_CLUSTER
1084
1085   def BuildHooksEnv(self):
1086     """Build hooks env.
1087
1088     """
1089     env = {"OP_TARGET": self.cfg.GetClusterName()}
1090     mn = self.cfg.GetMasterNode()
1091     return env, [], [mn]
1092
1093   def Exec(self, feedback_fn):
1094     """Nothing to do.
1095
1096     """
1097     return True
1098
1099
1100 class LUDestroyCluster(LogicalUnit):
1101   """Logical unit for destroying the cluster.
1102
1103   """
1104   HPATH = "cluster-destroy"
1105   HTYPE = constants.HTYPE_CLUSTER
1106
1107   def BuildHooksEnv(self):
1108     """Build hooks env.
1109
1110     """
1111     env = {"OP_TARGET": self.cfg.GetClusterName()}
1112     return env, [], []
1113
1114   def CheckPrereq(self):
1115     """Check prerequisites.
1116
1117     This checks whether the cluster is empty.
1118
1119     Any errors are signaled by raising errors.OpPrereqError.
1120
1121     """
1122     master = self.cfg.GetMasterNode()
1123
1124     nodelist = self.cfg.GetNodeList()
1125     if len(nodelist) != 1 or nodelist[0] != master:
1126       raise errors.OpPrereqError("There are still %d node(s) in"
1127                                  " this cluster." % (len(nodelist) - 1),
1128                                  errors.ECODE_INVAL)
1129     instancelist = self.cfg.GetInstanceList()
1130     if instancelist:
1131       raise errors.OpPrereqError("There are still %d instance(s) in"
1132                                  " this cluster." % len(instancelist),
1133                                  errors.ECODE_INVAL)
1134
1135   def Exec(self, feedback_fn):
1136     """Destroys the cluster.
1137
1138     """
1139     master = self.cfg.GetMasterNode()
1140
1141     # Run post hooks on master node before it's removed
1142     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1143     try:
1144       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1145     except:
1146       # pylint: disable-msg=W0702
1147       self.LogWarning("Errors occurred running hooks on %s" % master)
1148
1149     result = self.rpc.call_node_stop_master(master, False)
1150     result.Raise("Could not disable the master role")
1151
1152     return master
1153
1154
1155 def _VerifyCertificate(filename):
1156   """Verifies a certificate for LUVerifyCluster.
1157
1158   @type filename: string
1159   @param filename: Path to PEM file
1160
1161   """
1162   try:
1163     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1164                                            utils.ReadFile(filename))
1165   except Exception, err: # pylint: disable-msg=W0703
1166     return (LUVerifyCluster.ETYPE_ERROR,
1167             "Failed to load X509 certificate %s: %s" % (filename, err))
1168
1169   (errcode, msg) = \
1170     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1171                                 constants.SSL_CERT_EXPIRATION_ERROR)
1172
1173   if msg:
1174     fnamemsg = "While verifying %s: %s" % (filename, msg)
1175   else:
1176     fnamemsg = None
1177
1178   if errcode is None:
1179     return (None, fnamemsg)
1180   elif errcode == utils.CERT_WARNING:
1181     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1182   elif errcode == utils.CERT_ERROR:
1183     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1184
1185   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1186
1187
1188 class LUVerifyCluster(LogicalUnit):
1189   """Verifies the cluster status.
1190
1191   """
1192   HPATH = "cluster-verify"
1193   HTYPE = constants.HTYPE_CLUSTER
1194   _OP_PARAMS = [
1195     ("skip_checks", ht.EmptyList,
1196      ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1197     ("verbose", False, ht.TBool),
1198     ("error_codes", False, ht.TBool),
1199     ("debug_simulate_errors", False, ht.TBool),
1200     ]
1201   REQ_BGL = False
1202
1203   TCLUSTER = "cluster"
1204   TNODE = "node"
1205   TINSTANCE = "instance"
1206
1207   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1208   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1209   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1210   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1211   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1212   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1213   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1214   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1215   ENODEDRBD = (TNODE, "ENODEDRBD")
1216   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1217   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1218   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1219   ENODEHV = (TNODE, "ENODEHV")
1220   ENODELVM = (TNODE, "ENODELVM")
1221   ENODEN1 = (TNODE, "ENODEN1")
1222   ENODENET = (TNODE, "ENODENET")
1223   ENODEOS = (TNODE, "ENODEOS")
1224   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1225   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1226   ENODERPC = (TNODE, "ENODERPC")
1227   ENODESSH = (TNODE, "ENODESSH")
1228   ENODEVERSION = (TNODE, "ENODEVERSION")
1229   ENODESETUP = (TNODE, "ENODESETUP")
1230   ENODETIME = (TNODE, "ENODETIME")
1231
1232   ETYPE_FIELD = "code"
1233   ETYPE_ERROR = "ERROR"
1234   ETYPE_WARNING = "WARNING"
1235
1236   class NodeImage(object):
1237     """A class representing the logical and physical status of a node.
1238
1239     @type name: string
1240     @ivar name: the node name to which this object refers
1241     @ivar volumes: a structure as returned from
1242         L{ganeti.backend.GetVolumeList} (runtime)
1243     @ivar instances: a list of running instances (runtime)
1244     @ivar pinst: list of configured primary instances (config)
1245     @ivar sinst: list of configured secondary instances (config)
1246     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1247         of this node (config)
1248     @ivar mfree: free memory, as reported by hypervisor (runtime)
1249     @ivar dfree: free disk, as reported by the node (runtime)
1250     @ivar offline: the offline status (config)
1251     @type rpc_fail: boolean
1252     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1253         not whether the individual keys were correct) (runtime)
1254     @type lvm_fail: boolean
1255     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1256     @type hyp_fail: boolean
1257     @ivar hyp_fail: whether the RPC call didn't return the instance list
1258     @type ghost: boolean
1259     @ivar ghost: whether this is a known node or not (config)
1260     @type os_fail: boolean
1261     @ivar os_fail: whether the RPC call didn't return valid OS data
1262     @type oslist: list
1263     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1264     @type vm_capable: boolean
1265     @ivar vm_capable: whether the node can host instances
1266
1267     """
1268     def __init__(self, offline=False, name=None, vm_capable=True):
1269       self.name = name
1270       self.volumes = {}
1271       self.instances = []
1272       self.pinst = []
1273       self.sinst = []
1274       self.sbp = {}
1275       self.mfree = 0
1276       self.dfree = 0
1277       self.offline = offline
1278       self.vm_capable = vm_capable
1279       self.rpc_fail = False
1280       self.lvm_fail = False
1281       self.hyp_fail = False
1282       self.ghost = False
1283       self.os_fail = False
1284       self.oslist = {}
1285
1286   def ExpandNames(self):
1287     self.needed_locks = {
1288       locking.LEVEL_NODE: locking.ALL_SET,
1289       locking.LEVEL_INSTANCE: locking.ALL_SET,
1290     }
1291     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1292
1293   def _Error(self, ecode, item, msg, *args, **kwargs):
1294     """Format an error message.
1295
1296     Based on the opcode's error_codes parameter, either format a
1297     parseable error code, or a simpler error string.
1298
1299     This must be called only from Exec and functions called from Exec.
1300
1301     """
1302     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1303     itype, etxt = ecode
1304     # first complete the msg
1305     if args:
1306       msg = msg % args
1307     # then format the whole message
1308     if self.op.error_codes:
1309       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1310     else:
1311       if item:
1312         item = " " + item
1313       else:
1314         item = ""
1315       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1316     # and finally report it via the feedback_fn
1317     self._feedback_fn("  - %s" % msg)
1318
1319   def _ErrorIf(self, cond, *args, **kwargs):
1320     """Log an error message if the passed condition is True.
1321
1322     """
1323     cond = bool(cond) or self.op.debug_simulate_errors
1324     if cond:
1325       self._Error(*args, **kwargs)
1326     # do not mark the operation as failed for WARN cases only
1327     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1328       self.bad = self.bad or cond
1329
1330   def _VerifyNode(self, ninfo, nresult):
1331     """Perform some basic validation on data returned from a node.
1332
1333       - check the result data structure is well formed and has all the
1334         mandatory fields
1335       - check ganeti version
1336
1337     @type ninfo: L{objects.Node}
1338     @param ninfo: the node to check
1339     @param nresult: the results from the node
1340     @rtype: boolean
1341     @return: whether overall this call was successful (and we can expect
1342          reasonable values in the respose)
1343
1344     """
1345     node = ninfo.name
1346     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1347
1348     # main result, nresult should be a non-empty dict
1349     test = not nresult or not isinstance(nresult, dict)
1350     _ErrorIf(test, self.ENODERPC, node,
1351                   "unable to verify node: no data returned")
1352     if test:
1353       return False
1354
1355     # compares ganeti version
1356     local_version = constants.PROTOCOL_VERSION
1357     remote_version = nresult.get("version", None)
1358     test = not (remote_version and
1359                 isinstance(remote_version, (list, tuple)) and
1360                 len(remote_version) == 2)
1361     _ErrorIf(test, self.ENODERPC, node,
1362              "connection to node returned invalid data")
1363     if test:
1364       return False
1365
1366     test = local_version != remote_version[0]
1367     _ErrorIf(test, self.ENODEVERSION, node,
1368              "incompatible protocol versions: master %s,"
1369              " node %s", local_version, remote_version[0])
1370     if test:
1371       return False
1372
1373     # node seems compatible, we can actually try to look into its results
1374
1375     # full package version
1376     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1377                   self.ENODEVERSION, node,
1378                   "software version mismatch: master %s, node %s",
1379                   constants.RELEASE_VERSION, remote_version[1],
1380                   code=self.ETYPE_WARNING)
1381
1382     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1383     if ninfo.vm_capable and isinstance(hyp_result, dict):
1384       for hv_name, hv_result in hyp_result.iteritems():
1385         test = hv_result is not None
1386         _ErrorIf(test, self.ENODEHV, node,
1387                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1388
1389     test = nresult.get(constants.NV_NODESETUP,
1390                            ["Missing NODESETUP results"])
1391     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1392              "; ".join(test))
1393
1394     return True
1395
1396   def _VerifyNodeTime(self, ninfo, nresult,
1397                       nvinfo_starttime, nvinfo_endtime):
1398     """Check the node time.
1399
1400     @type ninfo: L{objects.Node}
1401     @param ninfo: the node to check
1402     @param nresult: the remote results for the node
1403     @param nvinfo_starttime: the start time of the RPC call
1404     @param nvinfo_endtime: the end time of the RPC call
1405
1406     """
1407     node = ninfo.name
1408     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1409
1410     ntime = nresult.get(constants.NV_TIME, None)
1411     try:
1412       ntime_merged = utils.MergeTime(ntime)
1413     except (ValueError, TypeError):
1414       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1415       return
1416
1417     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1418       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1419     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1420       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1421     else:
1422       ntime_diff = None
1423
1424     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1425              "Node time diverges by at least %s from master node time",
1426              ntime_diff)
1427
1428   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1429     """Check the node time.
1430
1431     @type ninfo: L{objects.Node}
1432     @param ninfo: the node to check
1433     @param nresult: the remote results for the node
1434     @param vg_name: the configured VG name
1435
1436     """
1437     if vg_name is None:
1438       return
1439
1440     node = ninfo.name
1441     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1442
1443     # checks vg existence and size > 20G
1444     vglist = nresult.get(constants.NV_VGLIST, None)
1445     test = not vglist
1446     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1447     if not test:
1448       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1449                                             constants.MIN_VG_SIZE)
1450       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1451
1452     # check pv names
1453     pvlist = nresult.get(constants.NV_PVLIST, None)
1454     test = pvlist is None
1455     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1456     if not test:
1457       # check that ':' is not present in PV names, since it's a
1458       # special character for lvcreate (denotes the range of PEs to
1459       # use on the PV)
1460       for _, pvname, owner_vg in pvlist:
1461         test = ":" in pvname
1462         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1463                  " '%s' of VG '%s'", pvname, owner_vg)
1464
1465   def _VerifyNodeNetwork(self, ninfo, nresult):
1466     """Check the node time.
1467
1468     @type ninfo: L{objects.Node}
1469     @param ninfo: the node to check
1470     @param nresult: the remote results for the node
1471
1472     """
1473     node = ninfo.name
1474     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1475
1476     test = constants.NV_NODELIST not in nresult
1477     _ErrorIf(test, self.ENODESSH, node,
1478              "node hasn't returned node ssh connectivity data")
1479     if not test:
1480       if nresult[constants.NV_NODELIST]:
1481         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1482           _ErrorIf(True, self.ENODESSH, node,
1483                    "ssh communication with node '%s': %s", a_node, a_msg)
1484
1485     test = constants.NV_NODENETTEST not in nresult
1486     _ErrorIf(test, self.ENODENET, node,
1487              "node hasn't returned node tcp connectivity data")
1488     if not test:
1489       if nresult[constants.NV_NODENETTEST]:
1490         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1491         for anode in nlist:
1492           _ErrorIf(True, self.ENODENET, node,
1493                    "tcp communication with node '%s': %s",
1494                    anode, nresult[constants.NV_NODENETTEST][anode])
1495
1496     test = constants.NV_MASTERIP not in nresult
1497     _ErrorIf(test, self.ENODENET, node,
1498              "node hasn't returned node master IP reachability data")
1499     if not test:
1500       if not nresult[constants.NV_MASTERIP]:
1501         if node == self.master_node:
1502           msg = "the master node cannot reach the master IP (not configured?)"
1503         else:
1504           msg = "cannot reach the master IP"
1505         _ErrorIf(True, self.ENODENET, node, msg)
1506
1507   def _VerifyInstance(self, instance, instanceconfig, node_image,
1508                       diskstatus):
1509     """Verify an instance.
1510
1511     This function checks to see if the required block devices are
1512     available on the instance's node.
1513
1514     """
1515     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1516     node_current = instanceconfig.primary_node
1517
1518     node_vol_should = {}
1519     instanceconfig.MapLVsByNode(node_vol_should)
1520
1521     for node in node_vol_should:
1522       n_img = node_image[node]
1523       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1524         # ignore missing volumes on offline or broken nodes
1525         continue
1526       for volume in node_vol_should[node]:
1527         test = volume not in n_img.volumes
1528         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1529                  "volume %s missing on node %s", volume, node)
1530
1531     if instanceconfig.admin_up:
1532       pri_img = node_image[node_current]
1533       test = instance not in pri_img.instances and not pri_img.offline
1534       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1535                "instance not running on its primary node %s",
1536                node_current)
1537
1538     for node, n_img in node_image.items():
1539       if (not node == node_current):
1540         test = instance in n_img.instances
1541         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1542                  "instance should not run on node %s", node)
1543
1544     diskdata = [(nname, success, status, idx)
1545                 for (nname, disks) in diskstatus.items()
1546                 for idx, (success, status) in enumerate(disks)]
1547
1548     for nname, success, bdev_status, idx in diskdata:
1549       _ErrorIf(instanceconfig.admin_up and not success,
1550                self.EINSTANCEFAULTYDISK, instance,
1551                "couldn't retrieve status for disk/%s on %s: %s",
1552                idx, nname, bdev_status)
1553       _ErrorIf((instanceconfig.admin_up and success and
1554                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1555                self.EINSTANCEFAULTYDISK, instance,
1556                "disk/%s on %s is faulty", idx, nname)
1557
1558   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1559     """Verify if there are any unknown volumes in the cluster.
1560
1561     The .os, .swap and backup volumes are ignored. All other volumes are
1562     reported as unknown.
1563
1564     @type reserved: L{ganeti.utils.FieldSet}
1565     @param reserved: a FieldSet of reserved volume names
1566
1567     """
1568     for node, n_img in node_image.items():
1569       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1570         # skip non-healthy nodes
1571         continue
1572       for volume in n_img.volumes:
1573         test = ((node not in node_vol_should or
1574                 volume not in node_vol_should[node]) and
1575                 not reserved.Matches(volume))
1576         self._ErrorIf(test, self.ENODEORPHANLV, node,
1577                       "volume %s is unknown", volume)
1578
1579   def _VerifyOrphanInstances(self, instancelist, node_image):
1580     """Verify the list of running instances.
1581
1582     This checks what instances are running but unknown to the cluster.
1583
1584     """
1585     for node, n_img in node_image.items():
1586       for o_inst in n_img.instances:
1587         test = o_inst not in instancelist
1588         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1589                       "instance %s on node %s should not exist", o_inst, node)
1590
1591   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1592     """Verify N+1 Memory Resilience.
1593
1594     Check that if one single node dies we can still start all the
1595     instances it was primary for.
1596
1597     """
1598     for node, n_img in node_image.items():
1599       # This code checks that every node which is now listed as
1600       # secondary has enough memory to host all instances it is
1601       # supposed to should a single other node in the cluster fail.
1602       # FIXME: not ready for failover to an arbitrary node
1603       # FIXME: does not support file-backed instances
1604       # WARNING: we currently take into account down instances as well
1605       # as up ones, considering that even if they're down someone
1606       # might want to start them even in the event of a node failure.
1607       for prinode, instances in n_img.sbp.items():
1608         needed_mem = 0
1609         for instance in instances:
1610           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1611           if bep[constants.BE_AUTO_BALANCE]:
1612             needed_mem += bep[constants.BE_MEMORY]
1613         test = n_img.mfree < needed_mem
1614         self._ErrorIf(test, self.ENODEN1, node,
1615                       "not enough memory on to accommodate"
1616                       " failovers should peer node %s fail", prinode)
1617
1618   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1619                        master_files):
1620     """Verifies and computes the node required file checksums.
1621
1622     @type ninfo: L{objects.Node}
1623     @param ninfo: the node to check
1624     @param nresult: the remote results for the node
1625     @param file_list: required list of files
1626     @param local_cksum: dictionary of local files and their checksums
1627     @param master_files: list of files that only masters should have
1628
1629     """
1630     node = ninfo.name
1631     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1632
1633     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1634     test = not isinstance(remote_cksum, dict)
1635     _ErrorIf(test, self.ENODEFILECHECK, node,
1636              "node hasn't returned file checksum data")
1637     if test:
1638       return
1639
1640     for file_name in file_list:
1641       node_is_mc = ninfo.master_candidate
1642       must_have = (file_name not in master_files) or node_is_mc
1643       # missing
1644       test1 = file_name not in remote_cksum
1645       # invalid checksum
1646       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1647       # existing and good
1648       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1649       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1650                "file '%s' missing", file_name)
1651       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1652                "file '%s' has wrong checksum", file_name)
1653       # not candidate and this is not a must-have file
1654       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1655                "file '%s' should not exist on non master"
1656                " candidates (and the file is outdated)", file_name)
1657       # all good, except non-master/non-must have combination
1658       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1659                "file '%s' should not exist"
1660                " on non master candidates", file_name)
1661
1662   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1663                       drbd_map):
1664     """Verifies and the node DRBD status.
1665
1666     @type ninfo: L{objects.Node}
1667     @param ninfo: the node to check
1668     @param nresult: the remote results for the node
1669     @param instanceinfo: the dict of instances
1670     @param drbd_helper: the configured DRBD usermode helper
1671     @param drbd_map: the DRBD map as returned by
1672         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1673
1674     """
1675     node = ninfo.name
1676     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1677
1678     if drbd_helper:
1679       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1680       test = (helper_result == None)
1681       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1682                "no drbd usermode helper returned")
1683       if helper_result:
1684         status, payload = helper_result
1685         test = not status
1686         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1687                  "drbd usermode helper check unsuccessful: %s", payload)
1688         test = status and (payload != drbd_helper)
1689         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1690                  "wrong drbd usermode helper: %s", payload)
1691
1692     # compute the DRBD minors
1693     node_drbd = {}
1694     for minor, instance in drbd_map[node].items():
1695       test = instance not in instanceinfo
1696       _ErrorIf(test, self.ECLUSTERCFG, None,
1697                "ghost instance '%s' in temporary DRBD map", instance)
1698         # ghost instance should not be running, but otherwise we
1699         # don't give double warnings (both ghost instance and
1700         # unallocated minor in use)
1701       if test:
1702         node_drbd[minor] = (instance, False)
1703       else:
1704         instance = instanceinfo[instance]
1705         node_drbd[minor] = (instance.name, instance.admin_up)
1706
1707     # and now check them
1708     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1709     test = not isinstance(used_minors, (tuple, list))
1710     _ErrorIf(test, self.ENODEDRBD, node,
1711              "cannot parse drbd status file: %s", str(used_minors))
1712     if test:
1713       # we cannot check drbd status
1714       return
1715
1716     for minor, (iname, must_exist) in node_drbd.items():
1717       test = minor not in used_minors and must_exist
1718       _ErrorIf(test, self.ENODEDRBD, node,
1719                "drbd minor %d of instance %s is not active", minor, iname)
1720     for minor in used_minors:
1721       test = minor not in node_drbd
1722       _ErrorIf(test, self.ENODEDRBD, node,
1723                "unallocated drbd minor %d is in use", minor)
1724
1725   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1726     """Builds the node OS structures.
1727
1728     @type ninfo: L{objects.Node}
1729     @param ninfo: the node to check
1730     @param nresult: the remote results for the node
1731     @param nimg: the node image object
1732
1733     """
1734     node = ninfo.name
1735     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1736
1737     remote_os = nresult.get(constants.NV_OSLIST, None)
1738     test = (not isinstance(remote_os, list) or
1739             not compat.all(isinstance(v, list) and len(v) == 7
1740                            for v in remote_os))
1741
1742     _ErrorIf(test, self.ENODEOS, node,
1743              "node hasn't returned valid OS data")
1744
1745     nimg.os_fail = test
1746
1747     if test:
1748       return
1749
1750     os_dict = {}
1751
1752     for (name, os_path, status, diagnose,
1753          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1754
1755       if name not in os_dict:
1756         os_dict[name] = []
1757
1758       # parameters is a list of lists instead of list of tuples due to
1759       # JSON lacking a real tuple type, fix it:
1760       parameters = [tuple(v) for v in parameters]
1761       os_dict[name].append((os_path, status, diagnose,
1762                             set(variants), set(parameters), set(api_ver)))
1763
1764     nimg.oslist = os_dict
1765
1766   def _VerifyNodeOS(self, ninfo, nimg, base):
1767     """Verifies the node OS list.
1768
1769     @type ninfo: L{objects.Node}
1770     @param ninfo: the node to check
1771     @param nimg: the node image object
1772     @param base: the 'template' node we match against (e.g. from the master)
1773
1774     """
1775     node = ninfo.name
1776     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1777
1778     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1779
1780     for os_name, os_data in nimg.oslist.items():
1781       assert os_data, "Empty OS status for OS %s?!" % os_name
1782       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1783       _ErrorIf(not f_status, self.ENODEOS, node,
1784                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1785       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1786                "OS '%s' has multiple entries (first one shadows the rest): %s",
1787                os_name, utils.CommaJoin([v[0] for v in os_data]))
1788       # this will catched in backend too
1789       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1790                and not f_var, self.ENODEOS, node,
1791                "OS %s with API at least %d does not declare any variant",
1792                os_name, constants.OS_API_V15)
1793       # comparisons with the 'base' image
1794       test = os_name not in base.oslist
1795       _ErrorIf(test, self.ENODEOS, node,
1796                "Extra OS %s not present on reference node (%s)",
1797                os_name, base.name)
1798       if test:
1799         continue
1800       assert base.oslist[os_name], "Base node has empty OS status?"
1801       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1802       if not b_status:
1803         # base OS is invalid, skipping
1804         continue
1805       for kind, a, b in [("API version", f_api, b_api),
1806                          ("variants list", f_var, b_var),
1807                          ("parameters", f_param, b_param)]:
1808         _ErrorIf(a != b, self.ENODEOS, node,
1809                  "OS %s %s differs from reference node %s: %s vs. %s",
1810                  kind, os_name, base.name,
1811                  utils.CommaJoin(a), utils.CommaJoin(b))
1812
1813     # check any missing OSes
1814     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1815     _ErrorIf(missing, self.ENODEOS, node,
1816              "OSes present on reference node %s but missing on this node: %s",
1817              base.name, utils.CommaJoin(missing))
1818
1819   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1820     """Verifies and updates the node volume data.
1821
1822     This function will update a L{NodeImage}'s internal structures
1823     with data from the remote call.
1824
1825     @type ninfo: L{objects.Node}
1826     @param ninfo: the node to check
1827     @param nresult: the remote results for the node
1828     @param nimg: the node image object
1829     @param vg_name: the configured VG name
1830
1831     """
1832     node = ninfo.name
1833     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1834
1835     nimg.lvm_fail = True
1836     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1837     if vg_name is None:
1838       pass
1839     elif isinstance(lvdata, basestring):
1840       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1841                utils.SafeEncode(lvdata))
1842     elif not isinstance(lvdata, dict):
1843       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1844     else:
1845       nimg.volumes = lvdata
1846       nimg.lvm_fail = False
1847
1848   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1849     """Verifies and updates the node instance list.
1850
1851     If the listing was successful, then updates this node's instance
1852     list. Otherwise, it marks the RPC call as failed for the instance
1853     list key.
1854
1855     @type ninfo: L{objects.Node}
1856     @param ninfo: the node to check
1857     @param nresult: the remote results for the node
1858     @param nimg: the node image object
1859
1860     """
1861     idata = nresult.get(constants.NV_INSTANCELIST, None)
1862     test = not isinstance(idata, list)
1863     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1864                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1865     if test:
1866       nimg.hyp_fail = True
1867     else:
1868       nimg.instances = idata
1869
1870   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1871     """Verifies and computes a node information map
1872
1873     @type ninfo: L{objects.Node}
1874     @param ninfo: the node to check
1875     @param nresult: the remote results for the node
1876     @param nimg: the node image object
1877     @param vg_name: the configured VG name
1878
1879     """
1880     node = ninfo.name
1881     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1882
1883     # try to read free memory (from the hypervisor)
1884     hv_info = nresult.get(constants.NV_HVINFO, None)
1885     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1886     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1887     if not test:
1888       try:
1889         nimg.mfree = int(hv_info["memory_free"])
1890       except (ValueError, TypeError):
1891         _ErrorIf(True, self.ENODERPC, node,
1892                  "node returned invalid nodeinfo, check hypervisor")
1893
1894     # FIXME: devise a free space model for file based instances as well
1895     if vg_name is not None:
1896       test = (constants.NV_VGLIST not in nresult or
1897               vg_name not in nresult[constants.NV_VGLIST])
1898       _ErrorIf(test, self.ENODELVM, node,
1899                "node didn't return data for the volume group '%s'"
1900                " - it is either missing or broken", vg_name)
1901       if not test:
1902         try:
1903           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1904         except (ValueError, TypeError):
1905           _ErrorIf(True, self.ENODERPC, node,
1906                    "node returned invalid LVM info, check LVM status")
1907
1908   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1909     """Gets per-disk status information for all instances.
1910
1911     @type nodelist: list of strings
1912     @param nodelist: Node names
1913     @type node_image: dict of (name, L{objects.Node})
1914     @param node_image: Node objects
1915     @type instanceinfo: dict of (name, L{objects.Instance})
1916     @param instanceinfo: Instance objects
1917
1918     """
1919     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1920
1921     node_disks = {}
1922     node_disks_devonly = {}
1923
1924     for nname in nodelist:
1925       disks = [(inst, disk)
1926                for instlist in [node_image[nname].pinst,
1927                                 node_image[nname].sinst]
1928                for inst in instlist
1929                for disk in instanceinfo[inst].disks]
1930
1931       if not disks:
1932         # No need to collect data
1933         continue
1934
1935       node_disks[nname] = disks
1936
1937       # Creating copies as SetDiskID below will modify the objects and that can
1938       # lead to incorrect data returned from nodes
1939       devonly = [dev.Copy() for (_, dev) in disks]
1940
1941       for dev in devonly:
1942         self.cfg.SetDiskID(dev, nname)
1943
1944       node_disks_devonly[nname] = devonly
1945
1946     assert len(node_disks) == len(node_disks_devonly)
1947
1948     # Collect data from all nodes with disks
1949     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
1950                                                           node_disks_devonly)
1951
1952     assert len(result) == len(node_disks)
1953
1954     instdisk = {}
1955
1956     for (nname, nres) in result.items():
1957       if nres.offline:
1958         # Ignore offline node
1959         continue
1960
1961       disks = node_disks[nname]
1962
1963       msg = nres.fail_msg
1964       _ErrorIf(msg, self.ENODERPC, nname,
1965                "while getting disk information: %s", nres.fail_msg)
1966       if msg:
1967         # No data from this node
1968         data = len(disks) * [None]
1969       else:
1970         data = nres.payload
1971
1972       for ((inst, _), status) in zip(disks, data):
1973         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
1974
1975     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
1976                       len(nnames) <= len(instanceinfo[inst].all_nodes)
1977                       for inst, nnames in instdisk.items()
1978                       for nname, statuses in nnames.items())
1979
1980     return instdisk
1981
1982   def BuildHooksEnv(self):
1983     """Build hooks env.
1984
1985     Cluster-Verify hooks just ran in the post phase and their failure makes
1986     the output be logged in the verify output and the verification to fail.
1987
1988     """
1989     all_nodes = self.cfg.GetNodeList()
1990     env = {
1991       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1992       }
1993     for node in self.cfg.GetAllNodesInfo().values():
1994       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1995
1996     return env, [], all_nodes
1997
1998   def Exec(self, feedback_fn):
1999     """Verify integrity of cluster, performing various test on nodes.
2000
2001     """
2002     self.bad = False
2003     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2004     verbose = self.op.verbose
2005     self._feedback_fn = feedback_fn
2006     feedback_fn("* Verifying global settings")
2007     for msg in self.cfg.VerifyConfig():
2008       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2009
2010     # Check the cluster certificates
2011     for cert_filename in constants.ALL_CERT_FILES:
2012       (errcode, msg) = _VerifyCertificate(cert_filename)
2013       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2014
2015     vg_name = self.cfg.GetVGName()
2016     drbd_helper = self.cfg.GetDRBDHelper()
2017     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2018     cluster = self.cfg.GetClusterInfo()
2019     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2020     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2021     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2022     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2023                         for iname in instancelist)
2024     i_non_redundant = [] # Non redundant instances
2025     i_non_a_balanced = [] # Non auto-balanced instances
2026     n_offline = 0 # Count of offline nodes
2027     n_drained = 0 # Count of nodes being drained
2028     node_vol_should = {}
2029
2030     # FIXME: verify OS list
2031     # do local checksums
2032     master_files = [constants.CLUSTER_CONF_FILE]
2033     master_node = self.master_node = self.cfg.GetMasterNode()
2034     master_ip = self.cfg.GetMasterIP()
2035
2036     file_names = ssconf.SimpleStore().GetFileList()
2037     file_names.extend(constants.ALL_CERT_FILES)
2038     file_names.extend(master_files)
2039     if cluster.modify_etc_hosts:
2040       file_names.append(constants.ETC_HOSTS)
2041
2042     local_checksums = utils.FingerprintFiles(file_names)
2043
2044     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2045     node_verify_param = {
2046       constants.NV_FILELIST: file_names,
2047       constants.NV_NODELIST: [node.name for node in nodeinfo
2048                               if not node.offline],
2049       constants.NV_HYPERVISOR: hypervisors,
2050       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2051                                   node.secondary_ip) for node in nodeinfo
2052                                  if not node.offline],
2053       constants.NV_INSTANCELIST: hypervisors,
2054       constants.NV_VERSION: None,
2055       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2056       constants.NV_NODESETUP: None,
2057       constants.NV_TIME: None,
2058       constants.NV_MASTERIP: (master_node, master_ip),
2059       constants.NV_OSLIST: None,
2060       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2061       }
2062
2063     if vg_name is not None:
2064       node_verify_param[constants.NV_VGLIST] = None
2065       node_verify_param[constants.NV_LVLIST] = vg_name
2066       node_verify_param[constants.NV_PVLIST] = [vg_name]
2067       node_verify_param[constants.NV_DRBDLIST] = None
2068
2069     if drbd_helper:
2070       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2071
2072     # Build our expected cluster state
2073     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2074                                                  name=node.name,
2075                                                  vm_capable=node.vm_capable))
2076                       for node in nodeinfo)
2077
2078     for instance in instancelist:
2079       inst_config = instanceinfo[instance]
2080
2081       for nname in inst_config.all_nodes:
2082         if nname not in node_image:
2083           # ghost node
2084           gnode = self.NodeImage(name=nname)
2085           gnode.ghost = True
2086           node_image[nname] = gnode
2087
2088       inst_config.MapLVsByNode(node_vol_should)
2089
2090       pnode = inst_config.primary_node
2091       node_image[pnode].pinst.append(instance)
2092
2093       for snode in inst_config.secondary_nodes:
2094         nimg = node_image[snode]
2095         nimg.sinst.append(instance)
2096         if pnode not in nimg.sbp:
2097           nimg.sbp[pnode] = []
2098         nimg.sbp[pnode].append(instance)
2099
2100     # At this point, we have the in-memory data structures complete,
2101     # except for the runtime information, which we'll gather next
2102
2103     # Due to the way our RPC system works, exact response times cannot be
2104     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2105     # time before and after executing the request, we can at least have a time
2106     # window.
2107     nvinfo_starttime = time.time()
2108     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2109                                            self.cfg.GetClusterName())
2110     nvinfo_endtime = time.time()
2111
2112     all_drbd_map = self.cfg.ComputeDRBDMap()
2113
2114     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2115     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2116
2117     feedback_fn("* Verifying node status")
2118
2119     refos_img = None
2120
2121     for node_i in nodeinfo:
2122       node = node_i.name
2123       nimg = node_image[node]
2124
2125       if node_i.offline:
2126         if verbose:
2127           feedback_fn("* Skipping offline node %s" % (node,))
2128         n_offline += 1
2129         continue
2130
2131       if node == master_node:
2132         ntype = "master"
2133       elif node_i.master_candidate:
2134         ntype = "master candidate"
2135       elif node_i.drained:
2136         ntype = "drained"
2137         n_drained += 1
2138       else:
2139         ntype = "regular"
2140       if verbose:
2141         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2142
2143       msg = all_nvinfo[node].fail_msg
2144       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2145       if msg:
2146         nimg.rpc_fail = True
2147         continue
2148
2149       nresult = all_nvinfo[node].payload
2150
2151       nimg.call_ok = self._VerifyNode(node_i, nresult)
2152       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2153       self._VerifyNodeNetwork(node_i, nresult)
2154       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2155                             master_files)
2156
2157       if nimg.vm_capable:
2158         self._VerifyNodeLVM(node_i, nresult, vg_name)
2159         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2160                              all_drbd_map)
2161
2162         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2163         self._UpdateNodeInstances(node_i, nresult, nimg)
2164         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2165         self._UpdateNodeOS(node_i, nresult, nimg)
2166         if not nimg.os_fail:
2167           if refos_img is None:
2168             refos_img = nimg
2169           self._VerifyNodeOS(node_i, nimg, refos_img)
2170
2171     feedback_fn("* Verifying instance status")
2172     for instance in instancelist:
2173       if verbose:
2174         feedback_fn("* Verifying instance %s" % instance)
2175       inst_config = instanceinfo[instance]
2176       self._VerifyInstance(instance, inst_config, node_image,
2177                            instdisk[instance])
2178       inst_nodes_offline = []
2179
2180       pnode = inst_config.primary_node
2181       pnode_img = node_image[pnode]
2182       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2183                self.ENODERPC, pnode, "instance %s, connection to"
2184                " primary node failed", instance)
2185
2186       if pnode_img.offline:
2187         inst_nodes_offline.append(pnode)
2188
2189       # If the instance is non-redundant we cannot survive losing its primary
2190       # node, so we are not N+1 compliant. On the other hand we have no disk
2191       # templates with more than one secondary so that situation is not well
2192       # supported either.
2193       # FIXME: does not support file-backed instances
2194       if not inst_config.secondary_nodes:
2195         i_non_redundant.append(instance)
2196       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2197                instance, "instance has multiple secondary nodes: %s",
2198                utils.CommaJoin(inst_config.secondary_nodes),
2199                code=self.ETYPE_WARNING)
2200
2201       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2202         i_non_a_balanced.append(instance)
2203
2204       for snode in inst_config.secondary_nodes:
2205         s_img = node_image[snode]
2206         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2207                  "instance %s, connection to secondary node failed", instance)
2208
2209         if s_img.offline:
2210           inst_nodes_offline.append(snode)
2211
2212       # warn that the instance lives on offline nodes
2213       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2214                "instance lives on offline node(s) %s",
2215                utils.CommaJoin(inst_nodes_offline))
2216       # ... or ghost/non-vm_capable nodes
2217       for node in inst_config.all_nodes:
2218         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2219                  "instance lives on ghost node %s", node)
2220         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2221                  instance, "instance lives on non-vm_capable node %s", node)
2222
2223     feedback_fn("* Verifying orphan volumes")
2224     reserved = utils.FieldSet(*cluster.reserved_lvs)
2225     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2226
2227     feedback_fn("* Verifying orphan instances")
2228     self._VerifyOrphanInstances(instancelist, node_image)
2229
2230     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2231       feedback_fn("* Verifying N+1 Memory redundancy")
2232       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2233
2234     feedback_fn("* Other Notes")
2235     if i_non_redundant:
2236       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2237                   % len(i_non_redundant))
2238
2239     if i_non_a_balanced:
2240       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2241                   % len(i_non_a_balanced))
2242
2243     if n_offline:
2244       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2245
2246     if n_drained:
2247       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2248
2249     return not self.bad
2250
2251   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2252     """Analyze the post-hooks' result
2253
2254     This method analyses the hook result, handles it, and sends some
2255     nicely-formatted feedback back to the user.
2256
2257     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2258         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2259     @param hooks_results: the results of the multi-node hooks rpc call
2260     @param feedback_fn: function used send feedback back to the caller
2261     @param lu_result: previous Exec result
2262     @return: the new Exec result, based on the previous result
2263         and hook results
2264
2265     """
2266     # We only really run POST phase hooks, and are only interested in
2267     # their results
2268     if phase == constants.HOOKS_PHASE_POST:
2269       # Used to change hooks' output to proper indentation
2270       indent_re = re.compile('^', re.M)
2271       feedback_fn("* Hooks Results")
2272       assert hooks_results, "invalid result from hooks"
2273
2274       for node_name in hooks_results:
2275         res = hooks_results[node_name]
2276         msg = res.fail_msg
2277         test = msg and not res.offline
2278         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2279                       "Communication failure in hooks execution: %s", msg)
2280         if res.offline or msg:
2281           # No need to investigate payload if node is offline or gave an error.
2282           # override manually lu_result here as _ErrorIf only
2283           # overrides self.bad
2284           lu_result = 1
2285           continue
2286         for script, hkr, output in res.payload:
2287           test = hkr == constants.HKR_FAIL
2288           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2289                         "Script %s failed, output:", script)
2290           if test:
2291             output = indent_re.sub('      ', output)
2292             feedback_fn("%s" % output)
2293             lu_result = 0
2294
2295       return lu_result
2296
2297
2298 class LUVerifyDisks(NoHooksLU):
2299   """Verifies the cluster disks status.
2300
2301   """
2302   REQ_BGL = False
2303
2304   def ExpandNames(self):
2305     self.needed_locks = {
2306       locking.LEVEL_NODE: locking.ALL_SET,
2307       locking.LEVEL_INSTANCE: locking.ALL_SET,
2308     }
2309     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2310
2311   def Exec(self, feedback_fn):
2312     """Verify integrity of cluster disks.
2313
2314     @rtype: tuple of three items
2315     @return: a tuple of (dict of node-to-node_error, list of instances
2316         which need activate-disks, dict of instance: (node, volume) for
2317         missing volumes
2318
2319     """
2320     result = res_nodes, res_instances, res_missing = {}, [], {}
2321
2322     vg_name = self.cfg.GetVGName()
2323     nodes = utils.NiceSort(self.cfg.GetNodeList())
2324     instances = [self.cfg.GetInstanceInfo(name)
2325                  for name in self.cfg.GetInstanceList()]
2326
2327     nv_dict = {}
2328     for inst in instances:
2329       inst_lvs = {}
2330       if (not inst.admin_up or
2331           inst.disk_template not in constants.DTS_NET_MIRROR):
2332         continue
2333       inst.MapLVsByNode(inst_lvs)
2334       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2335       for node, vol_list in inst_lvs.iteritems():
2336         for vol in vol_list:
2337           nv_dict[(node, vol)] = inst
2338
2339     if not nv_dict:
2340       return result
2341
2342     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2343
2344     for node in nodes:
2345       # node_volume
2346       node_res = node_lvs[node]
2347       if node_res.offline:
2348         continue
2349       msg = node_res.fail_msg
2350       if msg:
2351         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2352         res_nodes[node] = msg
2353         continue
2354
2355       lvs = node_res.payload
2356       for lv_name, (_, _, lv_online) in lvs.items():
2357         inst = nv_dict.pop((node, lv_name), None)
2358         if (not lv_online and inst is not None
2359             and inst.name not in res_instances):
2360           res_instances.append(inst.name)
2361
2362     # any leftover items in nv_dict are missing LVs, let's arrange the
2363     # data better
2364     for key, inst in nv_dict.iteritems():
2365       if inst.name not in res_missing:
2366         res_missing[inst.name] = []
2367       res_missing[inst.name].append(key)
2368
2369     return result
2370
2371
2372 class LURepairDiskSizes(NoHooksLU):
2373   """Verifies the cluster disks sizes.
2374
2375   """
2376   _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2377   REQ_BGL = False
2378
2379   def ExpandNames(self):
2380     if self.op.instances:
2381       self.wanted_names = []
2382       for name in self.op.instances:
2383         full_name = _ExpandInstanceName(self.cfg, name)
2384         self.wanted_names.append(full_name)
2385       self.needed_locks = {
2386         locking.LEVEL_NODE: [],
2387         locking.LEVEL_INSTANCE: self.wanted_names,
2388         }
2389       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2390     else:
2391       self.wanted_names = None
2392       self.needed_locks = {
2393         locking.LEVEL_NODE: locking.ALL_SET,
2394         locking.LEVEL_INSTANCE: locking.ALL_SET,
2395         }
2396     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2397
2398   def DeclareLocks(self, level):
2399     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2400       self._LockInstancesNodes(primary_only=True)
2401
2402   def CheckPrereq(self):
2403     """Check prerequisites.
2404
2405     This only checks the optional instance list against the existing names.
2406
2407     """
2408     if self.wanted_names is None:
2409       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2410
2411     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2412                              in self.wanted_names]
2413
2414   def _EnsureChildSizes(self, disk):
2415     """Ensure children of the disk have the needed disk size.
2416
2417     This is valid mainly for DRBD8 and fixes an issue where the
2418     children have smaller disk size.
2419
2420     @param disk: an L{ganeti.objects.Disk} object
2421
2422     """
2423     if disk.dev_type == constants.LD_DRBD8:
2424       assert disk.children, "Empty children for DRBD8?"
2425       fchild = disk.children[0]
2426       mismatch = fchild.size < disk.size
2427       if mismatch:
2428         self.LogInfo("Child disk has size %d, parent %d, fixing",
2429                      fchild.size, disk.size)
2430         fchild.size = disk.size
2431
2432       # and we recurse on this child only, not on the metadev
2433       return self._EnsureChildSizes(fchild) or mismatch
2434     else:
2435       return False
2436
2437   def Exec(self, feedback_fn):
2438     """Verify the size of cluster disks.
2439
2440     """
2441     # TODO: check child disks too
2442     # TODO: check differences in size between primary/secondary nodes
2443     per_node_disks = {}
2444     for instance in self.wanted_instances:
2445       pnode = instance.primary_node
2446       if pnode not in per_node_disks:
2447         per_node_disks[pnode] = []
2448       for idx, disk in enumerate(instance.disks):
2449         per_node_disks[pnode].append((instance, idx, disk))
2450
2451     changed = []
2452     for node, dskl in per_node_disks.items():
2453       newl = [v[2].Copy() for v in dskl]
2454       for dsk in newl:
2455         self.cfg.SetDiskID(dsk, node)
2456       result = self.rpc.call_blockdev_getsizes(node, newl)
2457       if result.fail_msg:
2458         self.LogWarning("Failure in blockdev_getsizes call to node"
2459                         " %s, ignoring", node)
2460         continue
2461       if len(result.data) != len(dskl):
2462         self.LogWarning("Invalid result from node %s, ignoring node results",
2463                         node)
2464         continue
2465       for ((instance, idx, disk), size) in zip(dskl, result.data):
2466         if size is None:
2467           self.LogWarning("Disk %d of instance %s did not return size"
2468                           " information, ignoring", idx, instance.name)
2469           continue
2470         if not isinstance(size, (int, long)):
2471           self.LogWarning("Disk %d of instance %s did not return valid"
2472                           " size information, ignoring", idx, instance.name)
2473           continue
2474         size = size >> 20
2475         if size != disk.size:
2476           self.LogInfo("Disk %d of instance %s has mismatched size,"
2477                        " correcting: recorded %d, actual %d", idx,
2478                        instance.name, disk.size, size)
2479           disk.size = size
2480           self.cfg.Update(instance, feedback_fn)
2481           changed.append((instance.name, idx, size))
2482         if self._EnsureChildSizes(disk):
2483           self.cfg.Update(instance, feedback_fn)
2484           changed.append((instance.name, idx, disk.size))
2485     return changed
2486
2487
2488 class LURenameCluster(LogicalUnit):
2489   """Rename the cluster.
2490
2491   """
2492   HPATH = "cluster-rename"
2493   HTYPE = constants.HTYPE_CLUSTER
2494   _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2495
2496   def BuildHooksEnv(self):
2497     """Build hooks env.
2498
2499     """
2500     env = {
2501       "OP_TARGET": self.cfg.GetClusterName(),
2502       "NEW_NAME": self.op.name,
2503       }
2504     mn = self.cfg.GetMasterNode()
2505     all_nodes = self.cfg.GetNodeList()
2506     return env, [mn], all_nodes
2507
2508   def CheckPrereq(self):
2509     """Verify that the passed name is a valid one.
2510
2511     """
2512     hostname = netutils.GetHostname(name=self.op.name,
2513                                     family=self.cfg.GetPrimaryIPFamily())
2514
2515     new_name = hostname.name
2516     self.ip = new_ip = hostname.ip
2517     old_name = self.cfg.GetClusterName()
2518     old_ip = self.cfg.GetMasterIP()
2519     if new_name == old_name and new_ip == old_ip:
2520       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2521                                  " cluster has changed",
2522                                  errors.ECODE_INVAL)
2523     if new_ip != old_ip:
2524       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2525         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2526                                    " reachable on the network" %
2527                                    new_ip, errors.ECODE_NOTUNIQUE)
2528
2529     self.op.name = new_name
2530
2531   def Exec(self, feedback_fn):
2532     """Rename the cluster.
2533
2534     """
2535     clustername = self.op.name
2536     ip = self.ip
2537
2538     # shutdown the master IP
2539     master = self.cfg.GetMasterNode()
2540     result = self.rpc.call_node_stop_master(master, False)
2541     result.Raise("Could not disable the master role")
2542
2543     try:
2544       cluster = self.cfg.GetClusterInfo()
2545       cluster.cluster_name = clustername
2546       cluster.master_ip = ip
2547       self.cfg.Update(cluster, feedback_fn)
2548
2549       # update the known hosts file
2550       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2551       node_list = self.cfg.GetNodeList()
2552       try:
2553         node_list.remove(master)
2554       except ValueError:
2555         pass
2556       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2557     finally:
2558       result = self.rpc.call_node_start_master(master, False, False)
2559       msg = result.fail_msg
2560       if msg:
2561         self.LogWarning("Could not re-enable the master role on"
2562                         " the master, please restart manually: %s", msg)
2563
2564     return clustername
2565
2566
2567 class LUSetClusterParams(LogicalUnit):
2568   """Change the parameters of the cluster.
2569
2570   """
2571   HPATH = "cluster-modify"
2572   HTYPE = constants.HTYPE_CLUSTER
2573   _OP_PARAMS = [
2574     ("vg_name", None, ht.TMaybeString),
2575     ("enabled_hypervisors", None,
2576      ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2577             ht.TNone)),
2578     ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2579                               ht.TNone)),
2580     ("beparams", None, ht.TOr(ht.TDict, ht.TNone)),
2581     ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2582                             ht.TNone)),
2583     ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2584                               ht.TNone)),
2585     ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2586     ("uid_pool", None, ht.NoType),
2587     ("add_uids", None, ht.NoType),
2588     ("remove_uids", None, ht.NoType),
2589     ("maintain_node_health", None, ht.TMaybeBool),
2590     ("prealloc_wipe_disks", None, ht.TMaybeBool),
2591     ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2592     ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
2593     ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2594     ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2595     ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2596     ("hidden_os", None, ht.TOr(ht.TListOf(\
2597           ht.TAnd(ht.TList,
2598                 ht.TIsLength(2),
2599                 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2600           ht.TNone)),
2601     ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2602           ht.TAnd(ht.TList,
2603                 ht.TIsLength(2),
2604                 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2605           ht.TNone)),
2606     ]
2607   REQ_BGL = False
2608
2609   def CheckArguments(self):
2610     """Check parameters
2611
2612     """
2613     if self.op.uid_pool:
2614       uidpool.CheckUidPool(self.op.uid_pool)
2615
2616     if self.op.add_uids:
2617       uidpool.CheckUidPool(self.op.add_uids)
2618
2619     if self.op.remove_uids:
2620       uidpool.CheckUidPool(self.op.remove_uids)
2621
2622   def ExpandNames(self):
2623     # FIXME: in the future maybe other cluster params won't require checking on
2624     # all nodes to be modified.
2625     self.needed_locks = {
2626       locking.LEVEL_NODE: locking.ALL_SET,
2627     }
2628     self.share_locks[locking.LEVEL_NODE] = 1
2629
2630   def BuildHooksEnv(self):
2631     """Build hooks env.
2632
2633     """
2634     env = {
2635       "OP_TARGET": self.cfg.GetClusterName(),
2636       "NEW_VG_NAME": self.op.vg_name,
2637       }
2638     mn = self.cfg.GetMasterNode()
2639     return env, [mn], [mn]
2640
2641   def CheckPrereq(self):
2642     """Check prerequisites.
2643
2644     This checks whether the given params don't conflict and
2645     if the given volume group is valid.
2646
2647     """
2648     if self.op.vg_name is not None and not self.op.vg_name:
2649       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2650         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2651                                    " instances exist", errors.ECODE_INVAL)
2652
2653     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2654       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2655         raise errors.OpPrereqError("Cannot disable drbd helper while"
2656                                    " drbd-based instances exist",
2657                                    errors.ECODE_INVAL)
2658
2659     node_list = self.acquired_locks[locking.LEVEL_NODE]
2660
2661     # if vg_name not None, checks given volume group on all nodes
2662     if self.op.vg_name:
2663       vglist = self.rpc.call_vg_list(node_list)
2664       for node in node_list:
2665         msg = vglist[node].fail_msg
2666         if msg:
2667           # ignoring down node
2668           self.LogWarning("Error while gathering data on node %s"
2669                           " (ignoring node): %s", node, msg)
2670           continue
2671         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2672                                               self.op.vg_name,
2673                                               constants.MIN_VG_SIZE)
2674         if vgstatus:
2675           raise errors.OpPrereqError("Error on node '%s': %s" %
2676                                      (node, vgstatus), errors.ECODE_ENVIRON)
2677
2678     if self.op.drbd_helper:
2679       # checks given drbd helper on all nodes
2680       helpers = self.rpc.call_drbd_helper(node_list)
2681       for node in node_list:
2682         ninfo = self.cfg.GetNodeInfo(node)
2683         if ninfo.offline:
2684           self.LogInfo("Not checking drbd helper on offline node %s", node)
2685           continue
2686         msg = helpers[node].fail_msg
2687         if msg:
2688           raise errors.OpPrereqError("Error checking drbd helper on node"
2689                                      " '%s': %s" % (node, msg),
2690                                      errors.ECODE_ENVIRON)
2691         node_helper = helpers[node].payload
2692         if node_helper != self.op.drbd_helper:
2693           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2694                                      (node, node_helper), errors.ECODE_ENVIRON)
2695
2696     self.cluster = cluster = self.cfg.GetClusterInfo()
2697     # validate params changes
2698     if self.op.beparams:
2699       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2700       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2701
2702     if self.op.ndparams:
2703       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2704       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2705
2706     if self.op.nicparams:
2707       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2708       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2709       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2710       nic_errors = []
2711
2712       # check all instances for consistency
2713       for instance in self.cfg.GetAllInstancesInfo().values():
2714         for nic_idx, nic in enumerate(instance.nics):
2715           params_copy = copy.deepcopy(nic.nicparams)
2716           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2717
2718           # check parameter syntax
2719           try:
2720             objects.NIC.CheckParameterSyntax(params_filled)
2721           except errors.ConfigurationError, err:
2722             nic_errors.append("Instance %s, nic/%d: %s" %
2723                               (instance.name, nic_idx, err))
2724
2725           # if we're moving instances to routed, check that they have an ip
2726           target_mode = params_filled[constants.NIC_MODE]
2727           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2728             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2729                               (instance.name, nic_idx))
2730       if nic_errors:
2731         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2732                                    "\n".join(nic_errors))
2733
2734     # hypervisor list/parameters
2735     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2736     if self.op.hvparams:
2737       for hv_name, hv_dict in self.op.hvparams.items():
2738         if hv_name not in self.new_hvparams:
2739           self.new_hvparams[hv_name] = hv_dict
2740         else:
2741           self.new_hvparams[hv_name].update(hv_dict)
2742
2743     # os hypervisor parameters
2744     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2745     if self.op.os_hvp:
2746       for os_name, hvs in self.op.os_hvp.items():
2747         if os_name not in self.new_os_hvp:
2748           self.new_os_hvp[os_name] = hvs
2749         else:
2750           for hv_name, hv_dict in hvs.items():
2751             if hv_name not in self.new_os_hvp[os_name]:
2752               self.new_os_hvp[os_name][hv_name] = hv_dict
2753             else:
2754               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2755
2756     # os parameters
2757     self.new_osp = objects.FillDict(cluster.osparams, {})
2758     if self.op.osparams:
2759       for os_name, osp in self.op.osparams.items():
2760         if os_name not in self.new_osp:
2761           self.new_osp[os_name] = {}
2762
2763         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2764                                                   use_none=True)
2765
2766         if not self.new_osp[os_name]:
2767           # we removed all parameters
2768           del self.new_osp[os_name]
2769         else:
2770           # check the parameter validity (remote check)
2771           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2772                          os_name, self.new_osp[os_name])
2773
2774     # changes to the hypervisor list
2775     if self.op.enabled_hypervisors is not None:
2776       self.hv_list = self.op.enabled_hypervisors
2777       for hv in self.hv_list:
2778         # if the hypervisor doesn't already exist in the cluster
2779         # hvparams, we initialize it to empty, and then (in both
2780         # cases) we make sure to fill the defaults, as we might not
2781         # have a complete defaults list if the hypervisor wasn't
2782         # enabled before
2783         if hv not in new_hvp:
2784           new_hvp[hv] = {}
2785         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2786         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2787     else:
2788       self.hv_list = cluster.enabled_hypervisors
2789
2790     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2791       # either the enabled list has changed, or the parameters have, validate
2792       for hv_name, hv_params in self.new_hvparams.items():
2793         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2794             (self.op.enabled_hypervisors and
2795              hv_name in self.op.enabled_hypervisors)):
2796           # either this is a new hypervisor, or its parameters have changed
2797           hv_class = hypervisor.GetHypervisor(hv_name)
2798           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2799           hv_class.CheckParameterSyntax(hv_params)
2800           _CheckHVParams(self, node_list, hv_name, hv_params)
2801
2802     if self.op.os_hvp:
2803       # no need to check any newly-enabled hypervisors, since the
2804       # defaults have already been checked in the above code-block
2805       for os_name, os_hvp in self.new_os_hvp.items():
2806         for hv_name, hv_params in os_hvp.items():
2807           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2808           # we need to fill in the new os_hvp on top of the actual hv_p
2809           cluster_defaults = self.new_hvparams.get(hv_name, {})
2810           new_osp = objects.FillDict(cluster_defaults, hv_params)
2811           hv_class = hypervisor.GetHypervisor(hv_name)
2812           hv_class.CheckParameterSyntax(new_osp)
2813           _CheckHVParams(self, node_list, hv_name, new_osp)
2814
2815     if self.op.default_iallocator:
2816       alloc_script = utils.FindFile(self.op.default_iallocator,
2817                                     constants.IALLOCATOR_SEARCH_PATH,
2818                                     os.path.isfile)
2819       if alloc_script is None:
2820         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2821                                    " specified" % self.op.default_iallocator,
2822                                    errors.ECODE_INVAL)
2823
2824   def Exec(self, feedback_fn):
2825     """Change the parameters of the cluster.
2826
2827     """
2828     if self.op.vg_name is not None:
2829       new_volume = self.op.vg_name
2830       if not new_volume:
2831         new_volume = None
2832       if new_volume != self.cfg.GetVGName():
2833         self.cfg.SetVGName(new_volume)
2834       else:
2835         feedback_fn("Cluster LVM configuration already in desired"
2836                     " state, not changing")
2837     if self.op.drbd_helper is not None:
2838       new_helper = self.op.drbd_helper
2839       if not new_helper:
2840         new_helper = None
2841       if new_helper != self.cfg.GetDRBDHelper():
2842         self.cfg.SetDRBDHelper(new_helper)
2843       else:
2844         feedback_fn("Cluster DRBD helper already in desired state,"
2845                     " not changing")
2846     if self.op.hvparams:
2847       self.cluster.hvparams = self.new_hvparams
2848     if self.op.os_hvp:
2849       self.cluster.os_hvp = self.new_os_hvp
2850     if self.op.enabled_hypervisors is not None:
2851       self.cluster.hvparams = self.new_hvparams
2852       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2853     if self.op.beparams:
2854       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2855     if self.op.nicparams:
2856       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2857     if self.op.osparams:
2858       self.cluster.osparams = self.new_osp
2859     if self.op.ndparams:
2860       self.cluster.ndparams = self.new_ndparams
2861
2862     if self.op.candidate_pool_size is not None:
2863       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2864       # we need to update the pool size here, otherwise the save will fail
2865       _AdjustCandidatePool(self, [])
2866
2867     if self.op.maintain_node_health is not None:
2868       self.cluster.maintain_node_health = self.op.maintain_node_health
2869
2870     if self.op.prealloc_wipe_disks is not None:
2871       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2872
2873     if self.op.add_uids is not None:
2874       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2875
2876     if self.op.remove_uids is not None:
2877       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2878
2879     if self.op.uid_pool is not None:
2880       self.cluster.uid_pool = self.op.uid_pool
2881
2882     if self.op.default_iallocator is not None:
2883       self.cluster.default_iallocator = self.op.default_iallocator
2884
2885     if self.op.reserved_lvs is not None:
2886       self.cluster.reserved_lvs = self.op.reserved_lvs
2887
2888     def helper_os(aname, mods, desc):
2889       desc += " OS list"
2890       lst = getattr(self.cluster, aname)
2891       for key, val in mods:
2892         if key == constants.DDM_ADD:
2893           if val in lst:
2894             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2895           else:
2896             lst.append(val)
2897         elif key == constants.DDM_REMOVE:
2898           if val in lst:
2899             lst.remove(val)
2900           else:
2901             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2902         else:
2903           raise errors.ProgrammerError("Invalid modification '%s'" % key)
2904
2905     if self.op.hidden_os:
2906       helper_os("hidden_os", self.op.hidden_os, "hidden")
2907
2908     if self.op.blacklisted_os:
2909       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2910
2911     self.cfg.Update(self.cluster, feedback_fn)
2912
2913
2914 def _UploadHelper(lu, nodes, fname):
2915   """Helper for uploading a file and showing warnings.
2916
2917   """
2918   if os.path.exists(fname):
2919     result = lu.rpc.call_upload_file(nodes, fname)
2920     for to_node, to_result in result.items():
2921       msg = to_result.fail_msg
2922       if msg:
2923         msg = ("Copy of file %s to node %s failed: %s" %
2924                (fname, to_node, msg))
2925         lu.proc.LogWarning(msg)
2926
2927
2928 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
2929   """Distribute additional files which are part of the cluster configuration.
2930
2931   ConfigWriter takes care of distributing the config and ssconf files, but
2932   there are more files which should be distributed to all nodes. This function
2933   makes sure those are copied.
2934
2935   @param lu: calling logical unit
2936   @param additional_nodes: list of nodes not in the config to distribute to
2937   @type additional_vm: boolean
2938   @param additional_vm: whether the additional nodes are vm-capable or not
2939
2940   """
2941   # 1. Gather target nodes
2942   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2943   dist_nodes = lu.cfg.GetOnlineNodeList()
2944   nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
2945   vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
2946   if additional_nodes is not None:
2947     dist_nodes.extend(additional_nodes)
2948     if additional_vm:
2949       vm_nodes.extend(additional_nodes)
2950   if myself.name in dist_nodes:
2951     dist_nodes.remove(myself.name)
2952   if myself.name in vm_nodes:
2953     vm_nodes.remove(myself.name)
2954
2955   # 2. Gather files to distribute
2956   dist_files = set([constants.ETC_HOSTS,
2957                     constants.SSH_KNOWN_HOSTS_FILE,
2958                     constants.RAPI_CERT_FILE,
2959                     constants.RAPI_USERS_FILE,
2960                     constants.CONFD_HMAC_KEY,
2961                     constants.CLUSTER_DOMAIN_SECRET_FILE,
2962                    ])
2963
2964   vm_files = set()
2965   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2966   for hv_name in enabled_hypervisors:
2967     hv_class = hypervisor.GetHypervisor(hv_name)
2968     vm_files.update(hv_class.GetAncillaryFiles())
2969
2970   # 3. Perform the files upload
2971   for fname in dist_files:
2972     _UploadHelper(lu, dist_nodes, fname)
2973   for fname in vm_files:
2974     _UploadHelper(lu, vm_nodes, fname)
2975
2976
2977 class LURedistributeConfig(NoHooksLU):
2978   """Force the redistribution of cluster configuration.
2979
2980   This is a very simple LU.
2981
2982   """
2983   REQ_BGL = False
2984
2985   def ExpandNames(self):
2986     self.needed_locks = {
2987       locking.LEVEL_NODE: locking.ALL_SET,
2988     }
2989     self.share_locks[locking.LEVEL_NODE] = 1
2990
2991   def Exec(self, feedback_fn):
2992     """Redistribute the configuration.
2993
2994     """
2995     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2996     _RedistributeAncillaryFiles(self)
2997
2998
2999 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3000   """Sleep and poll for an instance's disk to sync.
3001
3002   """
3003   if not instance.disks or disks is not None and not disks:
3004     return True
3005
3006   disks = _ExpandCheckDisks(instance, disks)
3007
3008   if not oneshot:
3009     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3010
3011   node = instance.primary_node
3012
3013   for dev in disks:
3014     lu.cfg.SetDiskID(dev, node)
3015
3016   # TODO: Convert to utils.Retry
3017
3018   retries = 0
3019   degr_retries = 10 # in seconds, as we sleep 1 second each time
3020   while True:
3021     max_time = 0
3022     done = True
3023     cumul_degraded = False
3024     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3025     msg = rstats.fail_msg
3026     if msg:
3027       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3028       retries += 1
3029       if retries >= 10:
3030         raise errors.RemoteError("Can't contact node %s for mirror data,"
3031                                  " aborting." % node)
3032       time.sleep(6)
3033       continue
3034     rstats = rstats.payload
3035     retries = 0
3036     for i, mstat in enumerate(rstats):
3037       if mstat is None:
3038         lu.LogWarning("Can't compute data for node %s/%s",
3039                            node, disks[i].iv_name)
3040         continue
3041
3042       cumul_degraded = (cumul_degraded or
3043                         (mstat.is_degraded and mstat.sync_percent is None))
3044       if mstat.sync_percent is not None:
3045         done = False
3046         if mstat.estimated_time is not None:
3047           rem_time = ("%s remaining (estimated)" %
3048                       utils.FormatSeconds(mstat.estimated_time))
3049           max_time = mstat.estimated_time
3050         else:
3051           rem_time = "no time estimate"
3052         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3053                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3054
3055     # if we're done but degraded, let's do a few small retries, to
3056     # make sure we see a stable and not transient situation; therefore
3057     # we force restart of the loop
3058     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3059       logging.info("Degraded disks found, %d retries left", degr_retries)
3060       degr_retries -= 1
3061       time.sleep(1)
3062       continue
3063
3064     if done or oneshot:
3065       break
3066
3067     time.sleep(min(60, max_time))
3068
3069   if done:
3070     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3071   return not cumul_degraded
3072
3073
3074 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3075   """Check that mirrors are not degraded.
3076
3077   The ldisk parameter, if True, will change the test from the
3078   is_degraded attribute (which represents overall non-ok status for
3079   the device(s)) to the ldisk (representing the local storage status).
3080
3081   """
3082   lu.cfg.SetDiskID(dev, node)
3083
3084   result = True
3085
3086   if on_primary or dev.AssembleOnSecondary():
3087     rstats = lu.rpc.call_blockdev_find(node, dev)
3088     msg = rstats.fail_msg
3089     if msg:
3090       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3091       result = False
3092     elif not rstats.payload:
3093       lu.LogWarning("Can't find disk on node %s", node)
3094       result = False
3095     else:
3096       if ldisk:
3097         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3098       else:
3099         result = result and not rstats.payload.is_degraded
3100
3101   if dev.children:
3102     for child in dev.children:
3103       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3104
3105   return result
3106
3107
3108 class LUDiagnoseOS(NoHooksLU):
3109   """Logical unit for OS diagnose/query.
3110
3111   """
3112   _OP_PARAMS = [
3113     _POutputFields,
3114     ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3115     ]
3116   REQ_BGL = False
3117   _HID = "hidden"
3118   _BLK = "blacklisted"
3119   _VLD = "valid"
3120   _FIELDS_STATIC = utils.FieldSet()
3121   _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3122                                    "parameters", "api_versions", _HID, _BLK)
3123
3124   def CheckArguments(self):
3125     if self.op.names:
3126       raise errors.OpPrereqError("Selective OS query not supported",
3127                                  errors.ECODE_INVAL)
3128
3129     _CheckOutputFields(static=self._FIELDS_STATIC,
3130                        dynamic=self._FIELDS_DYNAMIC,
3131                        selected=self.op.output_fields)
3132
3133   def ExpandNames(self):
3134     # Lock all nodes, in shared mode
3135     # Temporary removal of locks, should be reverted later
3136     # TODO: reintroduce locks when they are lighter-weight
3137     self.needed_locks = {}
3138     #self.share_locks[locking.LEVEL_NODE] = 1
3139     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3140
3141   @staticmethod
3142   def _DiagnoseByOS(rlist):
3143     """Remaps a per-node return list into an a per-os per-node dictionary
3144
3145     @param rlist: a map with node names as keys and OS objects as values
3146
3147     @rtype: dict
3148     @return: a dictionary with osnames as keys and as value another
3149         map, with nodes as keys and tuples of (path, status, diagnose,
3150         variants, parameters, api_versions) as values, eg::
3151
3152           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3153                                      (/srv/..., False, "invalid api")],
3154                            "node2": [(/srv/..., True, "", [], [])]}
3155           }
3156
3157     """
3158     all_os = {}
3159     # we build here the list of nodes that didn't fail the RPC (at RPC
3160     # level), so that nodes with a non-responding node daemon don't
3161     # make all OSes invalid
3162     good_nodes = [node_name for node_name in rlist
3163                   if not rlist[node_name].fail_msg]
3164     for node_name, nr in rlist.items():
3165       if nr.fail_msg or not nr.payload:
3166         continue
3167       for (name, path, status, diagnose, variants,
3168            params, api_versions) in nr.payload:
3169         if name not in all_os:
3170           # build a list of nodes for this os containing empty lists
3171           # for each node in node_list
3172           all_os[name] = {}
3173           for nname in good_nodes:
3174             all_os[name][nname] = []
3175         # convert params from [name, help] to (name, help)
3176         params = [tuple(v) for v in params]
3177         all_os[name][node_name].append((path, status, diagnose,
3178                                         variants, params, api_versions))
3179     return all_os
3180
3181   def Exec(self, feedback_fn):
3182     """Compute the list of OSes.
3183
3184     """
3185     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3186     node_data = self.rpc.call_os_diagnose(valid_nodes)
3187     pol = self._DiagnoseByOS(node_data)
3188     output = []
3189     cluster = self.cfg.GetClusterInfo()
3190
3191     for os_name in utils.NiceSort(pol.keys()):
3192       os_data = pol[os_name]
3193       row = []
3194       valid = True
3195       (variants, params, api_versions) = null_state = (set(), set(), set())
3196       for idx, osl in enumerate(os_data.values()):
3197         valid = bool(valid and osl and osl[0][1])
3198         if not valid:
3199           (variants, params, api_versions) = null_state
3200           break
3201         node_variants, node_params, node_api = osl[0][3:6]
3202         if idx == 0: # first entry
3203           variants = set(node_variants)
3204           params = set(node_params)
3205           api_versions = set(node_api)
3206         else: # keep consistency
3207           variants.intersection_update(node_variants)
3208           params.intersection_update(node_params)
3209           api_versions.intersection_update(node_api)
3210
3211       is_hid = os_name in cluster.hidden_os
3212       is_blk = os_name in cluster.blacklisted_os
3213       if ((self._HID not in self.op.output_fields and is_hid) or
3214           (self._BLK not in self.op.output_fields and is_blk) or
3215           (self._VLD not in self.op.output_fields and not valid)):
3216         continue
3217
3218       for field in self.op.output_fields:
3219         if field == "name":
3220           val = os_name
3221         elif field == self._VLD:
3222           val = valid
3223         elif field == "node_status":
3224           # this is just a copy of the dict
3225           val = {}
3226           for node_name, nos_list in os_data.items():
3227             val[node_name] = nos_list
3228         elif field == "variants":
3229           val = utils.NiceSort(list(variants))
3230         elif field == "parameters":
3231           val = list(params)
3232         elif field == "api_versions":
3233           val = list(api_versions)
3234         elif field == self._HID:
3235           val = is_hid
3236         elif field == self._BLK:
3237           val = is_blk
3238         else:
3239           raise errors.ParameterError(field)
3240         row.append(val)
3241       output.append(row)
3242
3243     return output
3244
3245
3246 class LURemoveNode(LogicalUnit):
3247   """Logical unit for removing a node.
3248
3249   """
3250   HPATH = "node-remove"
3251   HTYPE = constants.HTYPE_NODE
3252   _OP_PARAMS = [
3253     _PNodeName,
3254     ]
3255
3256   def BuildHooksEnv(self):
3257     """Build hooks env.
3258
3259     This doesn't run on the target node in the pre phase as a failed
3260     node would then be impossible to remove.
3261
3262     """
3263     env = {
3264       "OP_TARGET": self.op.node_name,
3265       "NODE_NAME": self.op.node_name,
3266       }
3267     all_nodes = self.cfg.GetNodeList()
3268     try:
3269       all_nodes.remove(self.op.node_name)
3270     except ValueError:
3271       logging.warning("Node %s which is about to be removed not found"
3272                       " in the all nodes list", self.op.node_name)
3273     return env, all_nodes, all_nodes
3274
3275   def CheckPrereq(self):
3276     """Check prerequisites.
3277
3278     This checks:
3279      - the node exists in the configuration
3280      - it does not have primary or secondary instances
3281      - it's not the master
3282
3283     Any errors are signaled by raising errors.OpPrereqError.
3284
3285     """
3286     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3287     node = self.cfg.GetNodeInfo(self.op.node_name)
3288     assert node is not None
3289
3290     instance_list = self.cfg.GetInstanceList()
3291
3292     masternode = self.cfg.GetMasterNode()
3293     if node.name == masternode:
3294       raise errors.OpPrereqError("Node is the master node,"
3295                                  " you need to failover first.",
3296                                  errors.ECODE_INVAL)
3297
3298     for instance_name in instance_list:
3299       instance = self.cfg.GetInstanceInfo(instance_name)
3300       if node.name in instance.all_nodes:
3301         raise errors.OpPrereqError("Instance %s is still running on the node,"
3302                                    " please remove first." % instance_name,
3303                                    errors.ECODE_INVAL)
3304     self.op.node_name = node.name
3305     self.node = node
3306
3307   def Exec(self, feedback_fn):
3308     """Removes the node from the cluster.
3309
3310     """
3311     node = self.node
3312     logging.info("Stopping the node daemon and removing configs from node %s",
3313                  node.name)
3314
3315     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3316
3317     # Promote nodes to master candidate as needed
3318     _AdjustCandidatePool(self, exceptions=[node.name])
3319     self.context.RemoveNode(node.name)
3320
3321     # Run post hooks on the node before it's removed
3322     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3323     try:
3324       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3325     except:
3326       # pylint: disable-msg=W0702
3327       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3328
3329     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3330     msg = result.fail_msg
3331     if msg:
3332       self.LogWarning("Errors encountered on the remote node while leaving"
3333                       " the cluster: %s", msg)
3334
3335     # Remove node from our /etc/hosts
3336     if self.cfg.GetClusterInfo().modify_etc_hosts:
3337       master_node = self.cfg.GetMasterNode()
3338       result = self.rpc.call_etc_hosts_modify(master_node,
3339                                               constants.ETC_HOSTS_REMOVE,
3340                                               node.name, None)
3341       result.Raise("Can't update hosts file with new host data")
3342       _RedistributeAncillaryFiles(self)
3343
3344
3345 class LUQueryNodes(NoHooksLU):
3346   """Logical unit for querying nodes.
3347
3348   """
3349   # pylint: disable-msg=W0142
3350   _OP_PARAMS = [
3351     _POutputFields,
3352     ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3353     ("use_locking", False, ht.TBool),
3354     ]
3355   REQ_BGL = False
3356
3357   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3358                     "master_candidate", "offline", "drained",
3359                     "master_capable", "vm_capable"]
3360
3361   _FIELDS_DYNAMIC = utils.FieldSet(
3362     "dtotal", "dfree",
3363     "mtotal", "mnode", "mfree",
3364     "bootid",
3365     "ctotal", "cnodes", "csockets",
3366     )
3367
3368   _FIELDS_STATIC = utils.FieldSet(*[
3369     "pinst_cnt", "sinst_cnt",
3370     "pinst_list", "sinst_list",
3371     "pip", "sip", "tags",
3372     "master", "role",
3373     "group.uuid", "group",
3374     ] + _SIMPLE_FIELDS
3375     )
3376
3377   def CheckArguments(self):
3378     _CheckOutputFields(static=self._FIELDS_STATIC,
3379                        dynamic=self._FIELDS_DYNAMIC,
3380                        selected=self.op.output_fields)
3381
3382   def ExpandNames(self):
3383     self.needed_locks = {}
3384     self.share_locks[locking.LEVEL_NODE] = 1
3385
3386     if self.op.names:
3387       self.wanted = _GetWantedNodes(self, self.op.names)
3388     else:
3389       self.wanted = locking.ALL_SET
3390
3391     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3392     self.do_locking = self.do_node_query and self.op.use_locking
3393     if self.do_locking:
3394       # if we don't request only static fields, we need to lock the nodes
3395       self.needed_locks[locking.LEVEL_NODE] = self.wanted
3396
3397   def Exec(self, feedback_fn):
3398     """Computes the list of nodes and their attributes.
3399
3400     """
3401     all_info = self.cfg.GetAllNodesInfo()
3402     if self.do_locking:
3403       nodenames = self.acquired_locks[locking.LEVEL_NODE]
3404     elif self.wanted != locking.ALL_SET:
3405       nodenames = self.wanted
3406       missing = set(nodenames).difference(all_info.keys())
3407       if missing:
3408         raise errors.OpExecError(
3409           "Some nodes were removed before retrieving their data: %s" % missing)
3410     else:
3411       nodenames = all_info.keys()
3412
3413     nodenames = utils.NiceSort(nodenames)
3414     nodelist = [all_info[name] for name in nodenames]
3415
3416     if "group" in self.op.output_fields:
3417       groups = self.cfg.GetAllNodeGroupsInfo()
3418     else:
3419       groups = {}
3420
3421     # begin data gathering
3422
3423     if self.do_node_query:
3424       live_data = {}
3425       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3426                                           self.cfg.GetHypervisorType())
3427       for name in nodenames:
3428         nodeinfo = node_data[name]
3429         if not nodeinfo.fail_msg and nodeinfo.payload:
3430           nodeinfo = nodeinfo.payload
3431           fn = utils.TryConvert
3432           live_data[name] = {
3433             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3434             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3435             "mfree": fn(int, nodeinfo.get('memory_free', None)),
3436             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3437             "dfree": fn(int, nodeinfo.get('vg_free', None)),
3438             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3439             "bootid": nodeinfo.get('bootid', None),
3440             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3441             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3442             }
3443         else:
3444           live_data[name] = {}
3445     else:
3446       live_data = dict.fromkeys(nodenames, {})
3447
3448     node_to_primary = dict([(name, set()) for name in nodenames])
3449     node_to_secondary = dict([(name, set()) for name in nodenames])
3450
3451     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3452                              "sinst_cnt", "sinst_list"))
3453     if inst_fields & frozenset(self.op.output_fields):
3454       inst_data = self.cfg.GetAllInstancesInfo()
3455
3456       for inst in inst_data.values():
3457         if inst.primary_node in node_to_primary:
3458           node_to_primary[inst.primary_node].add(inst.name)
3459         for secnode in inst.secondary_nodes:
3460           if secnode in node_to_secondary:
3461             node_to_secondary[secnode].add(inst.name)
3462
3463     master_node = self.cfg.GetMasterNode()
3464
3465     # end data gathering
3466
3467     output = []
3468     for node in nodelist:
3469       node_output = []
3470       for field in self.op.output_fields:
3471         if field in self._SIMPLE_FIELDS:
3472           val = getattr(node, field)
3473         elif field == "pinst_list":
3474           val = list(node_to_primary[node.name])
3475         elif field == "sinst_list":
3476           val = list(node_to_secondary[node.name])
3477         elif field == "pinst_cnt":
3478           val = len(node_to_primary[node.name])
3479         elif field == "sinst_cnt":
3480           val = len(node_to_secondary[node.name])
3481         elif field == "pip":
3482           val = node.primary_ip
3483         elif field == "sip":
3484           val = node.secondary_ip
3485         elif field == "tags":
3486           val = list(node.GetTags())
3487         elif field == "master":
3488           val = node.name == master_node
3489         elif self._FIELDS_DYNAMIC.Matches(field):
3490           val = live_data[node.name].get(field, None)
3491         elif field == "role":
3492           if node.name == master_node:
3493             val = "M"
3494           elif node.master_candidate:
3495             val = "C"
3496           elif node.drained:
3497             val = "D"
3498           elif node.offline:
3499             val = "O"
3500           else:
3501             val = "R"
3502         elif field == "group.uuid":
3503           val = node.group
3504         elif field == "group":
3505           ng = groups.get(node.group, None)
3506           if ng is None:
3507             val = "<unknown>"
3508           else:
3509             val = ng.name
3510         else:
3511           raise errors.ParameterError(field)
3512         node_output.append(val)
3513       output.append(node_output)
3514
3515     return output
3516
3517
3518 class LUQueryNodeVolumes(NoHooksLU):
3519   """Logical unit for getting volumes on node(s).
3520
3521   """
3522   _OP_PARAMS = [
3523     _POutputFields,
3524     ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3525     ]
3526   REQ_BGL = False
3527   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3528   _FIELDS_STATIC = utils.FieldSet("node")
3529
3530   def CheckArguments(self):
3531     _CheckOutputFields(static=self._FIELDS_STATIC,
3532                        dynamic=self._FIELDS_DYNAMIC,
3533                        selected=self.op.output_fields)
3534
3535   def ExpandNames(self):
3536     self.needed_locks = {}
3537     self.share_locks[locking.LEVEL_NODE] = 1
3538     if not self.op.nodes:
3539       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3540     else:
3541       self.needed_locks[locking.LEVEL_NODE] = \
3542         _GetWantedNodes(self, self.op.nodes)
3543
3544   def Exec(self, feedback_fn):
3545     """Computes the list of nodes and their attributes.
3546
3547     """
3548     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3549     volumes = self.rpc.call_node_volumes(nodenames)
3550
3551     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3552              in self.cfg.GetInstanceList()]
3553
3554     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3555
3556     output = []
3557     for node in nodenames:
3558       nresult = volumes[node]
3559       if nresult.offline:
3560         continue
3561       msg = nresult.fail_msg
3562       if msg:
3563         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3564         continue
3565
3566       node_vols = nresult.payload[:]
3567       node_vols.sort(key=lambda vol: vol['dev'])
3568
3569       for vol in node_vols:
3570         node_output = []
3571         for field in self.op.output_fields:
3572           if field == "node":
3573             val = node
3574           elif field == "phys":
3575             val = vol['dev']
3576           elif field == "vg":
3577             val = vol['vg']
3578           elif field == "name":
3579             val = vol['name']
3580           elif field == "size":
3581             val = int(float(vol['size']))
3582           elif field == "instance":
3583             for inst in ilist:
3584               if node not in lv_by_node[inst]:
3585                 continue
3586               if vol['name'] in lv_by_node[inst][node]:
3587                 val = inst.name
3588                 break
3589             else:
3590               val = '-'
3591           else:
3592             raise errors.ParameterError(field)
3593           node_output.append(str(val))
3594
3595         output.append(node_output)
3596
3597     return output
3598
3599
3600 class LUQueryNodeStorage(NoHooksLU):
3601   """Logical unit for getting information on storage units on node(s).
3602
3603   """
3604   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3605   _OP_PARAMS = [
3606     _POutputFields,
3607     ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3608     ("storage_type", ht.NoDefault, _CheckStorageType),
3609     ("name", None, ht.TMaybeString),
3610     ]
3611   REQ_BGL = False
3612
3613   def CheckArguments(self):
3614     _CheckOutputFields(static=self._FIELDS_STATIC,
3615                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3616                        selected=self.op.output_fields)
3617
3618   def ExpandNames(self):
3619     self.needed_locks = {}
3620     self.share_locks[locking.LEVEL_NODE] = 1
3621
3622     if self.op.nodes:
3623       self.needed_locks[locking.LEVEL_NODE] = \
3624         _GetWantedNodes(self, self.op.nodes)
3625     else:
3626       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3627
3628   def Exec(self, feedback_fn):
3629     """Computes the list of nodes and their attributes.
3630
3631     """
3632     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3633
3634     # Always get name to sort by
3635     if constants.SF_NAME in self.op.output_fields:
3636       fields = self.op.output_fields[:]
3637     else:
3638       fields = [constants.SF_NAME] + self.op.output_fields
3639
3640     # Never ask for node or type as it's only known to the LU
3641     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3642       while extra in fields:
3643         fields.remove(extra)
3644
3645     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3646     name_idx = field_idx[constants.SF_NAME]
3647
3648     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3649     data = self.rpc.call_storage_list(self.nodes,
3650                                       self.op.storage_type, st_args,
3651                                       self.op.name, fields)
3652
3653     result = []
3654
3655     for node in utils.NiceSort(self.nodes):
3656       nresult = data[node]
3657       if nresult.offline:
3658         continue
3659
3660       msg = nresult.fail_msg
3661       if msg:
3662         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3663         continue
3664
3665       rows = dict([(row[name_idx], row) for row in nresult.payload])
3666
3667       for name in utils.NiceSort(rows.keys()):
3668         row = rows[name]
3669
3670         out = []
3671
3672         for field in self.op.output_fields:
3673           if field == constants.SF_NODE:
3674             val = node
3675           elif field == constants.SF_TYPE:
3676             val = self.op.storage_type
3677           elif field in field_idx:
3678             val = row[field_idx[field]]
3679           else:
3680             raise errors.ParameterError(field)
3681
3682           out.append(val)
3683
3684         result.append(out)
3685
3686     return result
3687
3688
3689 class LUModifyNodeStorage(NoHooksLU):
3690   """Logical unit for modifying a storage volume on a node.
3691
3692   """
3693   _OP_PARAMS = [
3694     _PNodeName,
3695     ("storage_type", ht.NoDefault, _CheckStorageType),
3696     ("name", ht.NoDefault, ht.TNonEmptyString),
3697     ("changes", ht.NoDefault, ht.TDict),
3698     ]
3699   REQ_BGL = False
3700
3701   def CheckArguments(self):
3702     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3703
3704     storage_type = self.op.storage_type
3705
3706     try:
3707       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3708     except KeyError:
3709       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3710                                  " modified" % storage_type,
3711                                  errors.ECODE_INVAL)
3712
3713     diff = set(self.op.changes.keys()) - modifiable
3714     if diff:
3715       raise errors.OpPrereqError("The following fields can not be modified for"
3716                                  " storage units of type '%s': %r" %
3717                                  (storage_type, list(diff)),
3718                                  errors.ECODE_INVAL)
3719
3720   def ExpandNames(self):
3721     self.needed_locks = {
3722       locking.LEVEL_NODE: self.op.node_name,
3723       }
3724
3725   def Exec(self, feedback_fn):
3726     """Computes the list of nodes and their attributes.
3727
3728     """
3729     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3730     result = self.rpc.call_storage_modify(self.op.node_name,
3731                                           self.op.storage_type, st_args,
3732                                           self.op.name, self.op.changes)
3733     result.Raise("Failed to modify storage unit '%s' on %s" %
3734                  (self.op.name, self.op.node_name))
3735
3736
3737 class LUAddNode(LogicalUnit):
3738   """Logical unit for adding node to the cluster.
3739
3740   """
3741   HPATH = "node-add"
3742   HTYPE = constants.HTYPE_NODE
3743   _OP_PARAMS = [
3744     _PNodeName,
3745     ("primary_ip", None, ht.NoType),
3746     ("secondary_ip", None, ht.TMaybeString),
3747     ("readd", False, ht.TBool),
3748     ("group", None, ht.TMaybeString),
3749     ("master_capable", None, ht.TMaybeBool),
3750     ("vm_capable", None, ht.TMaybeBool),
3751     ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
3752     ]
3753   _NFLAGS = ["master_capable", "vm_capable"]
3754
3755   def CheckArguments(self):
3756     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3757     # validate/normalize the node name
3758     self.hostname = netutils.GetHostname(name=self.op.node_name,
3759                                          family=self.primary_ip_family)
3760     self.op.node_name = self.hostname.name
3761     if self.op.readd and self.op.group:
3762       raise errors.OpPrereqError("Cannot pass a node group when a node is"
3763                                  " being readded", errors.ECODE_INVAL)
3764
3765   def BuildHooksEnv(self):
3766     """Build hooks env.
3767
3768     This will run on all nodes before, and on all nodes + the new node after.
3769
3770     """
3771     env = {
3772       "OP_TARGET": self.op.node_name,
3773       "NODE_NAME": self.op.node_name,
3774       "NODE_PIP": self.op.primary_ip,
3775       "NODE_SIP": self.op.secondary_ip,
3776       "MASTER_CAPABLE": str(self.op.master_capable),
3777       "VM_CAPABLE": str(self.op.vm_capable),
3778       }
3779     nodes_0 = self.cfg.GetNodeList()
3780     nodes_1 = nodes_0 + [self.op.node_name, ]
3781     return env, nodes_0, nodes_1
3782
3783   def CheckPrereq(self):
3784     """Check prerequisites.
3785
3786     This checks:
3787      - the new node is not already in the config
3788      - it is resolvable
3789      - its parameters (single/dual homed) matches the cluster
3790
3791     Any errors are signaled by raising errors.OpPrereqError.
3792
3793     """
3794     cfg = self.cfg
3795     hostname = self.hostname
3796     node = hostname.name
3797     primary_ip = self.op.primary_ip = hostname.ip
3798     if self.op.secondary_ip is None:
3799       if self.primary_ip_family == netutils.IP6Address.family:
3800         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3801                                    " IPv4 address must be given as secondary",
3802                                    errors.ECODE_INVAL)
3803       self.op.secondary_ip = primary_ip
3804
3805     secondary_ip = self.op.secondary_ip
3806     if not netutils.IP4Address.IsValid(secondary_ip):
3807       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3808                                  " address" % secondary_ip, errors.ECODE_INVAL)
3809
3810     node_list = cfg.GetNodeList()
3811     if not self.op.readd and node in node_list:
3812       raise errors.OpPrereqError("Node %s is already in the configuration" %
3813                                  node, errors.ECODE_EXISTS)
3814     elif self.op.readd and node not in node_list:
3815       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3816                                  errors.ECODE_NOENT)
3817
3818     self.changed_primary_ip = False
3819
3820     for existing_node_name in node_list:
3821       existing_node = cfg.GetNodeInfo(existing_node_name)
3822
3823       if self.op.readd and node == existing_node_name:
3824         if existing_node.secondary_ip != secondary_ip:
3825           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3826                                      " address configuration as before",
3827                                      errors.ECODE_INVAL)
3828         if existing_node.primary_ip != primary_ip:
3829           self.changed_primary_ip = True
3830
3831         continue
3832
3833       if (existing_node.primary_ip == primary_ip or
3834           existing_node.secondary_ip == primary_ip or
3835           existing_node.primary_ip == secondary_ip or
3836           existing_node.secondary_ip == secondary_ip):
3837         raise errors.OpPrereqError("New node ip address(es) conflict with"
3838                                    " existing node %s" % existing_node.name,
3839                                    errors.ECODE_NOTUNIQUE)
3840
3841     # After this 'if' block, None is no longer a valid value for the
3842     # _capable op attributes
3843     if self.op.readd:
3844       old_node = self.cfg.GetNodeInfo(node)
3845       assert old_node is not None, "Can't retrieve locked node %s" % node
3846       for attr in self._NFLAGS:
3847         if getattr(self.op, attr) is None:
3848           setattr(self.op, attr, getattr(old_node, attr))
3849     else:
3850       for attr in self._NFLAGS:
3851         if getattr(self.op, attr) is None:
3852           setattr(self.op, attr, True)
3853
3854     if self.op.readd and not self.op.vm_capable:
3855       pri, sec = cfg.GetNodeInstances(node)
3856       if pri or sec:
3857         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
3858                                    " flag set to false, but it already holds"
3859                                    " instances" % node,
3860                                    errors.ECODE_STATE)
3861
3862     # check that the type of the node (single versus dual homed) is the
3863     # same as for the master
3864     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3865     master_singlehomed = myself.secondary_ip == myself.primary_ip
3866     newbie_singlehomed = secondary_ip == primary_ip
3867     if master_singlehomed != newbie_singlehomed:
3868       if master_singlehomed:
3869         raise errors.OpPrereqError("The master has no secondary ip but the"
3870                                    " new node has one",
3871                                    errors.ECODE_INVAL)
3872       else:
3873         raise errors.OpPrereqError("The master has a secondary ip but the"
3874                                    " new node doesn't have one",
3875                                    errors.ECODE_INVAL)
3876
3877     # checks reachability
3878     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3879       raise errors.OpPrereqError("Node not reachable by ping",
3880                                  errors.ECODE_ENVIRON)
3881
3882     if not newbie_singlehomed:
3883       # check reachability from my secondary ip to newbie's secondary ip
3884       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3885                            source=myself.secondary_ip):
3886         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3887                                    " based ping to node daemon port",
3888                                    errors.ECODE_ENVIRON)
3889
3890     if self.op.readd:
3891       exceptions = [node]
3892     else:
3893       exceptions = []
3894
3895     if self.op.master_capable:
3896       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3897     else:
3898       self.master_candidate = False
3899
3900     if self.op.readd:
3901       self.new_node = old_node
3902     else:
3903       node_group = cfg.LookupNodeGroup(self.op.group)
3904       self.new_node = objects.Node(name=node,
3905                                    primary_ip=primary_ip,
3906                                    secondary_ip=secondary_ip,
3907                                    master_candidate=self.master_candidate,
3908                                    offline=False, drained=False,
3909                                    group=node_group)
3910
3911     if self.op.ndparams:
3912       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3913
3914   def Exec(self, feedback_fn):
3915     """Adds the new node to the cluster.
3916
3917     """
3918     new_node = self.new_node
3919     node = new_node.name
3920
3921     # for re-adds, reset the offline/drained/master-candidate flags;
3922     # we need to reset here, otherwise offline would prevent RPC calls
3923     # later in the procedure; this also means that if the re-add
3924     # fails, we are left with a non-offlined, broken node
3925     if self.op.readd:
3926       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3927       self.LogInfo("Readding a node, the offline/drained flags were reset")
3928       # if we demote the node, we do cleanup later in the procedure
3929       new_node.master_candidate = self.master_candidate
3930       if self.changed_primary_ip:
3931         new_node.primary_ip = self.op.primary_ip
3932
3933     # copy the master/vm_capable flags
3934     for attr in self._NFLAGS:
3935       setattr(new_node, attr, getattr(self.op, attr))
3936
3937     # notify the user about any possible mc promotion
3938     if new_node.master_candidate:
3939       self.LogInfo("Node will be a master candidate")
3940
3941     if self.op.ndparams:
3942       new_node.ndparams = self.op.ndparams
3943
3944     # check connectivity
3945     result = self.rpc.call_version([node])[node]
3946     result.Raise("Can't get version information from node %s" % node)
3947     if constants.PROTOCOL_VERSION == result.payload:
3948       logging.info("Communication to node %s fine, sw version %s match",
3949                    node, result.payload)
3950     else:
3951       raise errors.OpExecError("Version mismatch master version %s,"
3952                                " node version %s" %
3953                                (constants.PROTOCOL_VERSION, result.payload))
3954
3955     # Add node to our /etc/hosts, and add key to known_hosts
3956     if self.cfg.GetClusterInfo().modify_etc_hosts:
3957       master_node = self.cfg.GetMasterNode()
3958       result = self.rpc.call_etc_hosts_modify(master_node,
3959                                               constants.ETC_HOSTS_ADD,
3960                                               self.hostname.name,
3961                                               self.hostname.ip)
3962       result.Raise("Can't update hosts file with new host data")
3963
3964     if new_node.secondary_ip != new_node.primary_ip:
3965       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
3966                                False)
3967
3968     node_verify_list = [self.cfg.GetMasterNode()]
3969     node_verify_param = {
3970       constants.NV_NODELIST: [node],
3971       # TODO: do a node-net-test as well?
3972     }
3973
3974     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3975                                        self.cfg.GetClusterName())
3976     for verifier in node_verify_list:
3977       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3978       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3979       if nl_payload:
3980         for failed in nl_payload:
3981           feedback_fn("ssh/hostname verification failed"
3982                       " (checking from %s): %s" %
3983                       (verifier, nl_payload[failed]))
3984         raise errors.OpExecError("ssh/hostname verification failed.")
3985
3986     if self.op.readd:
3987       _RedistributeAncillaryFiles(self)
3988       self.context.ReaddNode(new_node)
3989       # make sure we redistribute the config
3990       self.cfg.Update(new_node, feedback_fn)
3991       # and make sure the new node will not have old files around
3992       if not new_node.master_candidate:
3993         result = self.rpc.call_node_demote_from_mc(new_node.name)
3994         msg = result.fail_msg
3995         if msg:
3996           self.LogWarning("Node failed to demote itself from master"
3997                           " candidate status: %s" % msg)
3998     else:
3999       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4000                                   additional_vm=self.op.vm_capable)
4001       self.context.AddNode(new_node, self.proc.GetECId())
4002
4003
4004 class LUSetNodeParams(LogicalUnit):
4005   """Modifies the parameters of a node.
4006
4007   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4008       to the node role (as _ROLE_*)
4009   @cvar _R2F: a dictionary from node role to tuples of flags
4010   @cvar _FLAGS: a list of attribute names corresponding to the flags
4011
4012   """
4013   HPATH = "node-modify"
4014   HTYPE = constants.HTYPE_NODE
4015   _OP_PARAMS = [
4016     _PNodeName,
4017     ("master_candidate", None, ht.TMaybeBool),
4018     ("offline", None, ht.TMaybeBool),
4019     ("drained", None, ht.TMaybeBool),
4020     ("auto_promote", False, ht.TBool),
4021     ("master_capable", None, ht.TMaybeBool),
4022     ("vm_capable", None, ht.TMaybeBool),
4023     ("secondary_ip", None, ht.TMaybeString),
4024     ("ndparams", None, ht.TOr(ht.TDict, ht.TNone)),
4025     _PForce,
4026     ]
4027   REQ_BGL = False
4028   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4029   _F2R = {
4030     (True, False, False): _ROLE_CANDIDATE,
4031     (False, True, False): _ROLE_DRAINED,
4032     (False, False, True): _ROLE_OFFLINE,
4033     (False, False, False): _ROLE_REGULAR,
4034     }
4035   _R2F = dict((v, k) for k, v in _F2R.items())
4036   _FLAGS = ["master_candidate", "drained", "offline"]
4037
4038   def CheckArguments(self):
4039     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4040     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4041                 self.op.master_capable, self.op.vm_capable,
4042                 self.op.secondary_ip]
4043     if all_mods.count(None) == len(all_mods):
4044       raise errors.OpPrereqError("Please pass at least one modification",
4045                                  errors.ECODE_INVAL)
4046     if all_mods.count(True) > 1:
4047       raise errors.OpPrereqError("Can't set the node into more than one"
4048                                  " state at the same time",
4049                                  errors.ECODE_INVAL)
4050
4051     # Boolean value that tells us whether we might be demoting from MC
4052     self.might_demote = (self.op.master_candidate == False or
4053                          self.op.offline == True or
4054                          self.op.drained == True or
4055                          self.op.master_capable == False)
4056
4057     if self.op.secondary_ip:
4058       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4059         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4060                                    " address" % self.op.secondary_ip,
4061                                    errors.ECODE_INVAL)
4062
4063     self.lock_all = self.op.auto_promote and self.might_demote
4064     self.lock_instances = self.op.secondary_ip is not None
4065
4066   def ExpandNames(self):
4067     if self.lock_all:
4068       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4069     else:
4070       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4071
4072     if self.lock_instances:
4073       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4074
4075   def DeclareLocks(self, level):
4076     # If we have locked all instances, before waiting to lock nodes, release
4077     # all the ones living on nodes unrelated to the current operation.
4078     if level == locking.LEVEL_NODE and self.lock_instances:
4079       instances_release = []
4080       instances_keep = []
4081       self.affected_instances = []
4082       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4083         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4084           instance = self.context.cfg.GetInstanceInfo(instance_name)
4085           i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4086           if i_mirrored and self.op.node_name in instance.all_nodes:
4087             instances_keep.append(instance_name)
4088             self.affected_instances.append(instance)
4089           else:
4090             instances_release.append(instance_name)
4091         if instances_release:
4092           self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4093           self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4094
4095   def BuildHooksEnv(self):
4096     """Build hooks env.
4097
4098     This runs on the master node.
4099
4100     """
4101     env = {
4102       "OP_TARGET": self.op.node_name,
4103       "MASTER_CANDIDATE": str(self.op.master_candidate),
4104       "OFFLINE": str(self.op.offline),
4105       "DRAINED": str(self.op.drained),
4106       "MASTER_CAPABLE": str(self.op.master_capable),
4107       "VM_CAPABLE": str(self.op.vm_capable),
4108       }
4109     nl = [self.cfg.GetMasterNode(),
4110           self.op.node_name]
4111     return env, nl, nl
4112
4113   def CheckPrereq(self):
4114     """Check prerequisites.
4115
4116     This only checks the instance list against the existing names.
4117
4118     """
4119     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4120
4121     if (self.op.master_candidate is not None or
4122         self.op.drained is not None or
4123         self.op.offline is not None):
4124       # we can't change the master's node flags
4125       if self.op.node_name == self.cfg.GetMasterNode():
4126         raise errors.OpPrereqError("The master role can be changed"
4127                                    " only via master-failover",
4128                                    errors.ECODE_INVAL)
4129
4130     if self.op.master_candidate and not node.master_capable:
4131       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4132                                  " it a master candidate" % node.name,
4133                                  errors.ECODE_STATE)
4134
4135     if self.op.vm_capable == False:
4136       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4137       if ipri or isec:
4138         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4139                                    " the vm_capable flag" % node.name,
4140                                    errors.ECODE_STATE)
4141
4142     if node.master_candidate and self.might_demote and not self.lock_all:
4143       assert not self.op.auto_promote, "auto-promote set but lock_all not"
4144       # check if after removing the current node, we're missing master
4145       # candidates
4146       (mc_remaining, mc_should, _) = \
4147           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4148       if mc_remaining < mc_should:
4149         raise errors.OpPrereqError("Not enough master candidates, please"
4150                                    " pass auto_promote to allow promotion",
4151                                    errors.ECODE_STATE)
4152
4153     self.old_flags = old_flags = (node.master_candidate,
4154                                   node.drained, node.offline)
4155     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4156     self.old_role = old_role = self._F2R[old_flags]
4157
4158     # Check for ineffective changes
4159     for attr in self._FLAGS:
4160       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4161         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4162         setattr(self.op, attr, None)
4163
4164     # Past this point, any flag change to False means a transition
4165     # away from the respective state, as only real changes are kept
4166
4167     # If we're being deofflined/drained, we'll MC ourself if needed
4168     if (self.op.drained == False or self.op.offline == False or
4169         (self.op.master_capable and not node.master_capable)):
4170       if _DecideSelfPromotion(self):
4171         self.op.master_candidate = True
4172         self.LogInfo("Auto-promoting node to master candidate")
4173
4174     # If we're no longer master capable, we'll demote ourselves from MC
4175     if self.op.master_capable == False and node.master_candidate:
4176       self.LogInfo("Demoting from master candidate")
4177       self.op.master_candidate = False
4178
4179     # Compute new role
4180     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4181     if self.op.master_candidate:
4182       new_role = self._ROLE_CANDIDATE
4183     elif self.op.drained:
4184       new_role = self._ROLE_DRAINED
4185     elif self.op.offline:
4186       new_role = self._ROLE_OFFLINE
4187     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4188       # False is still in new flags, which means we're un-setting (the
4189       # only) True flag
4190       new_role = self._ROLE_REGULAR
4191     else: # no new flags, nothing, keep old role
4192       new_role = old_role
4193
4194     self.new_role = new_role
4195
4196     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4197       # Trying to transition out of offline status
4198       result = self.rpc.call_version([node.name])[node.name]
4199       if result.fail_msg:
4200         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4201                                    " to report its version: %s" %
4202                                    (node.name, result.fail_msg),
4203                                    errors.ECODE_STATE)
4204       else:
4205         self.LogWarning("Transitioning node from offline to online state"
4206                         " without using re-add. Please make sure the node"
4207                         " is healthy!")
4208
4209     if self.op.secondary_ip:
4210       # Ok even without locking, because this can't be changed by any LU
4211       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4212       master_singlehomed = master.secondary_ip == master.primary_ip
4213       if master_singlehomed and self.op.secondary_ip:
4214         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4215                                    " homed cluster", errors.ECODE_INVAL)
4216
4217       if node.offline:
4218         if self.affected_instances:
4219           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4220                                      " node has instances (%s) configured"
4221                                      " to use it" % self.affected_instances)
4222       else:
4223         # On online nodes, check that no instances are running, and that
4224         # the node has the new ip and we can reach it.
4225         for instance in self.affected_instances:
4226           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4227
4228         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4229         if master.name != node.name:
4230           # check reachability from master secondary ip to new secondary ip
4231           if not netutils.TcpPing(self.op.secondary_ip,
4232                                   constants.DEFAULT_NODED_PORT,
4233                                   source=master.secondary_ip):
4234             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4235                                        " based ping to node daemon port",
4236                                        errors.ECODE_ENVIRON)
4237
4238     if self.op.ndparams:
4239       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4240       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4241       self.new_ndparams = new_ndparams
4242
4243   def Exec(self, feedback_fn):
4244     """Modifies a node.
4245
4246     """
4247     node = self.node
4248     old_role = self.old_role
4249     new_role = self.new_role
4250
4251     result = []
4252
4253     if self.op.ndparams:
4254       node.ndparams = self.new_ndparams
4255
4256     for attr in ["master_capable", "vm_capable"]:
4257       val = getattr(self.op, attr)
4258       if val is not None:
4259         setattr(node, attr, val)
4260         result.append((attr, str(val)))
4261
4262     if new_role != old_role:
4263       # Tell the node to demote itself, if no longer MC and not offline
4264       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4265         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4266         if msg:
4267           self.LogWarning("Node failed to demote itself: %s", msg)
4268
4269       new_flags = self._R2F[new_role]
4270       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4271         if of != nf:
4272           result.append((desc, str(nf)))
4273       (node.master_candidate, node.drained, node.offline) = new_flags
4274
4275       # we locked all nodes, we adjust the CP before updating this node
4276       if self.lock_all:
4277         _AdjustCandidatePool(self, [node.name])
4278
4279     if self.op.secondary_ip:
4280       node.secondary_ip = self.op.secondary_ip
4281       result.append(("secondary_ip", self.op.secondary_ip))
4282
4283     # this will trigger configuration file update, if needed
4284     self.cfg.Update(node, feedback_fn)
4285
4286     # this will trigger job queue propagation or cleanup if the mc
4287     # flag changed
4288     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4289       self.context.ReaddNode(node)
4290
4291     return result
4292
4293
4294 class LUPowercycleNode(NoHooksLU):
4295   """Powercycles a node.
4296
4297   """
4298   _OP_PARAMS = [
4299     _PNodeName,
4300     _PForce,
4301     ]
4302   REQ_BGL = False
4303
4304   def CheckArguments(self):
4305     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4306     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4307       raise errors.OpPrereqError("The node is the master and the force"
4308                                  " parameter was not set",
4309                                  errors.ECODE_INVAL)
4310
4311   def ExpandNames(self):
4312     """Locking for PowercycleNode.
4313
4314     This is a last-resort option and shouldn't block on other
4315     jobs. Therefore, we grab no locks.
4316
4317     """
4318     self.needed_locks = {}
4319
4320   def Exec(self, feedback_fn):
4321     """Reboots a node.
4322
4323     """
4324     result = self.rpc.call_node_powercycle(self.op.node_name,
4325                                            self.cfg.GetHypervisorType())
4326     result.Raise("Failed to schedule the reboot")
4327     return result.payload
4328
4329
4330 class LUQueryClusterInfo(NoHooksLU):
4331   """Query cluster configuration.
4332
4333   """
4334   REQ_BGL = False
4335
4336   def ExpandNames(self):
4337     self.needed_locks = {}
4338
4339   def Exec(self, feedback_fn):
4340     """Return cluster config.
4341
4342     """
4343     cluster = self.cfg.GetClusterInfo()
4344     os_hvp = {}
4345
4346     # Filter just for enabled hypervisors
4347     for os_name, hv_dict in cluster.os_hvp.items():
4348       os_hvp[os_name] = {}
4349       for hv_name, hv_params in hv_dict.items():
4350         if hv_name in cluster.enabled_hypervisors:
4351           os_hvp[os_name][hv_name] = hv_params
4352
4353     # Convert ip_family to ip_version
4354     primary_ip_version = constants.IP4_VERSION
4355     if cluster.primary_ip_family == netutils.IP6Address.family:
4356       primary_ip_version = constants.IP6_VERSION
4357
4358     result = {
4359       "software_version": constants.RELEASE_VERSION,
4360       "protocol_version": constants.PROTOCOL_VERSION,
4361       "config_version": constants.CONFIG_VERSION,
4362       "os_api_version": max(constants.OS_API_VERSIONS),
4363       "export_version": constants.EXPORT_VERSION,
4364       "architecture": (platform.architecture()[0], platform.machine()),
4365       "name": cluster.cluster_name,
4366       "master": cluster.master_node,
4367       "default_hypervisor": cluster.enabled_hypervisors[0],
4368       "enabled_hypervisors": cluster.enabled_hypervisors,
4369       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4370                         for hypervisor_name in cluster.enabled_hypervisors]),
4371       "os_hvp": os_hvp,
4372       "beparams": cluster.beparams,
4373       "osparams": cluster.osparams,
4374       "nicparams": cluster.nicparams,
4375       "candidate_pool_size": cluster.candidate_pool_size,
4376       "master_netdev": cluster.master_netdev,
4377       "volume_group_name": cluster.volume_group_name,
4378       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4379       "file_storage_dir": cluster.file_storage_dir,
4380       "maintain_node_health": cluster.maintain_node_health,
4381       "ctime": cluster.ctime,
4382       "mtime": cluster.mtime,
4383       "uuid": cluster.uuid,
4384       "tags": list(cluster.GetTags()),
4385       "uid_pool": cluster.uid_pool,
4386       "default_iallocator": cluster.default_iallocator,
4387       "reserved_lvs": cluster.reserved_lvs,
4388       "primary_ip_version": primary_ip_version,
4389       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4390       }
4391
4392     return result
4393
4394
4395 class LUQueryConfigValues(NoHooksLU):
4396   """Return configuration values.
4397
4398   """
4399   _OP_PARAMS = [_POutputFields]
4400   REQ_BGL = False
4401   _FIELDS_DYNAMIC = utils.FieldSet()
4402   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4403                                   "watcher_pause", "volume_group_name")
4404
4405   def CheckArguments(self):
4406     _CheckOutputFields(static=self._FIELDS_STATIC,
4407                        dynamic=self._FIELDS_DYNAMIC,
4408                        selected=self.op.output_fields)
4409
4410   def ExpandNames(self):
4411     self.needed_locks = {}
4412
4413   def Exec(self, feedback_fn):
4414     """Dump a representation of the cluster config to the standard output.
4415
4416     """
4417     values = []
4418     for field in self.op.output_fields:
4419       if field == "cluster_name":
4420         entry = self.cfg.GetClusterName()
4421       elif field == "master_node":
4422         entry = self.cfg.GetMasterNode()
4423       elif field == "drain_flag":
4424         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4425       elif field == "watcher_pause":
4426         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4427       elif field == "volume_group_name":
4428         entry = self.cfg.GetVGName()
4429       else:
4430         raise errors.ParameterError(field)
4431       values.append(entry)
4432     return values
4433
4434
4435 class LUActivateInstanceDisks(NoHooksLU):
4436   """Bring up an instance's disks.
4437
4438   """
4439   _OP_PARAMS = [
4440     _PInstanceName,
4441     ("ignore_size", False, ht.TBool),
4442     ]
4443   REQ_BGL = False
4444
4445   def ExpandNames(self):
4446     self._ExpandAndLockInstance()
4447     self.needed_locks[locking.LEVEL_NODE] = []
4448     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4449
4450   def DeclareLocks(self, level):
4451     if level == locking.LEVEL_NODE:
4452       self._LockInstancesNodes()
4453
4454   def CheckPrereq(self):
4455     """Check prerequisites.
4456
4457     This checks that the instance is in the cluster.
4458
4459     """
4460     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4461     assert self.instance is not None, \
4462       "Cannot retrieve locked instance %s" % self.op.instance_name
4463     _CheckNodeOnline(self, self.instance.primary_node)
4464
4465   def Exec(self, feedback_fn):
4466     """Activate the disks.
4467
4468     """
4469     disks_ok, disks_info = \
4470               _AssembleInstanceDisks(self, self.instance,
4471                                      ignore_size=self.op.ignore_size)
4472     if not disks_ok:
4473       raise errors.OpExecError("Cannot activate block devices")
4474
4475     return disks_info
4476
4477
4478 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4479                            ignore_size=False):
4480   """Prepare the block devices for an instance.
4481
4482   This sets up the block devices on all nodes.
4483
4484   @type lu: L{LogicalUnit}
4485   @param lu: the logical unit on whose behalf we execute
4486   @type instance: L{objects.Instance}
4487   @param instance: the instance for whose disks we assemble
4488   @type disks: list of L{objects.Disk} or None
4489   @param disks: which disks to assemble (or all, if None)
4490   @type ignore_secondaries: boolean
4491   @param ignore_secondaries: if true, errors on secondary nodes
4492       won't result in an error return from the function
4493   @type ignore_size: boolean
4494   @param ignore_size: if true, the current known size of the disk
4495       will not be used during the disk activation, useful for cases
4496       when the size is wrong
4497   @return: False if the operation failed, otherwise a list of
4498       (host, instance_visible_name, node_visible_name)
4499       with the mapping from node devices to instance devices
4500
4501   """
4502   device_info = []
4503   disks_ok = True
4504   iname = instance.name
4505   disks = _ExpandCheckDisks(instance, disks)
4506
4507   # With the two passes mechanism we try to reduce the window of
4508   # opportunity for the race condition of switching DRBD to primary
4509   # before handshaking occured, but we do not eliminate it
4510
4511   # The proper fix would be to wait (with some limits) until the
4512   # connection has been made and drbd transitions from WFConnection
4513   # into any other network-connected state (Connected, SyncTarget,
4514   # SyncSource, etc.)
4515
4516   # 1st pass, assemble on all nodes in secondary mode
4517   for inst_disk in disks:
4518     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4519       if ignore_size:
4520         node_disk = node_disk.Copy()
4521         node_disk.UnsetSize()
4522       lu.cfg.SetDiskID(node_disk, node)
4523       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4524       msg = result.fail_msg
4525       if msg:
4526         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4527                            " (is_primary=False, pass=1): %s",
4528                            inst_disk.iv_name, node, msg)
4529         if not ignore_secondaries:
4530           disks_ok = False
4531
4532   # FIXME: race condition on drbd migration to primary
4533
4534   # 2nd pass, do only the primary node
4535   for inst_disk in disks:
4536     dev_path = None
4537
4538     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4539       if node != instance.primary_node:
4540         continue
4541       if ignore_size:
4542         node_disk = node_disk.Copy()
4543         node_disk.UnsetSize()
4544       lu.cfg.SetDiskID(node_disk, node)
4545       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4546       msg = result.fail_msg
4547       if msg:
4548         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4549                            " (is_primary=True, pass=2): %s",
4550                            inst_disk.iv_name, node, msg)
4551         disks_ok = False
4552       else:
4553         dev_path = result.payload
4554
4555     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4556
4557   # leave the disks configured for the primary node
4558   # this is a workaround that would be fixed better by
4559   # improving the logical/physical id handling
4560   for disk in disks:
4561     lu.cfg.SetDiskID(disk, instance.primary_node)
4562
4563   return disks_ok, device_info
4564
4565
4566 def _StartInstanceDisks(lu, instance, force):
4567   """Start the disks of an instance.
4568
4569   """
4570   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4571                                            ignore_secondaries=force)
4572   if not disks_ok:
4573     _ShutdownInstanceDisks(lu, instance)
4574     if force is not None and not force:
4575       lu.proc.LogWarning("", hint="If the message above refers to a"
4576                          " secondary node,"
4577                          " you can retry the operation using '--force'.")
4578     raise errors.OpExecError("Disk consistency error")
4579
4580
4581 class LUDeactivateInstanceDisks(NoHooksLU):
4582   """Shutdown an instance's disks.
4583
4584   """
4585   _OP_PARAMS = [
4586     _PInstanceName,
4587     ]
4588   REQ_BGL = False
4589
4590   def ExpandNames(self):
4591     self._ExpandAndLockInstance()
4592     self.needed_locks[locking.LEVEL_NODE] = []
4593     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4594
4595   def DeclareLocks(self, level):
4596     if level == locking.LEVEL_NODE:
4597       self._LockInstancesNodes()
4598
4599   def CheckPrereq(self):
4600     """Check prerequisites.
4601
4602     This checks that the instance is in the cluster.
4603
4604     """
4605     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4606     assert self.instance is not None, \
4607       "Cannot retrieve locked instance %s" % self.op.instance_name
4608
4609   def Exec(self, feedback_fn):
4610     """Deactivate the disks
4611
4612     """
4613     instance = self.instance
4614     _SafeShutdownInstanceDisks(self, instance)
4615
4616
4617 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4618   """Shutdown block devices of an instance.
4619
4620   This function checks if an instance is running, before calling
4621   _ShutdownInstanceDisks.
4622
4623   """
4624   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4625   _ShutdownInstanceDisks(lu, instance, disks=disks)
4626
4627
4628 def _ExpandCheckDisks(instance, disks):
4629   """Return the instance disks selected by the disks list
4630
4631   @type disks: list of L{objects.Disk} or None
4632   @param disks: selected disks
4633   @rtype: list of L{objects.Disk}
4634   @return: selected instance disks to act on
4635
4636   """
4637   if disks is None:
4638     return instance.disks
4639   else:
4640     if not set(disks).issubset(instance.disks):
4641       raise errors.ProgrammerError("Can only act on disks belonging to the"
4642                                    " target instance")
4643     return disks
4644
4645
4646 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4647   """Shutdown block devices of an instance.
4648
4649   This does the shutdown on all nodes of the instance.
4650
4651   If the ignore_primary is false, errors on the primary node are
4652   ignored.
4653
4654   """
4655   all_result = True
4656   disks = _ExpandCheckDisks(instance, disks)
4657
4658   for disk in disks:
4659     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4660       lu.cfg.SetDiskID(top_disk, node)
4661       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4662       msg = result.fail_msg
4663       if msg:
4664         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4665                       disk.iv_name, node, msg)
4666         if not ignore_primary or node != instance.primary_node:
4667           all_result = False
4668   return all_result
4669
4670
4671 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4672   """Checks if a node has enough free memory.
4673
4674   This function check if a given node has the needed amount of free
4675   memory. In case the node has less memory or we cannot get the
4676   information from the node, this function raise an OpPrereqError
4677   exception.
4678
4679   @type lu: C{LogicalUnit}
4680   @param lu: a logical unit from which we get configuration data
4681   @type node: C{str}
4682   @param node: the node to check
4683   @type reason: C{str}
4684   @param reason: string to use in the error message
4685   @type requested: C{int}
4686   @param requested: the amount of memory in MiB to check for
4687   @type hypervisor_name: C{str}
4688   @param hypervisor_name: the hypervisor to ask for memory stats
4689   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4690       we cannot check the node
4691
4692   """
4693   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4694   nodeinfo[node].Raise("Can't get data from node %s" % node,
4695                        prereq=True, ecode=errors.ECODE_ENVIRON)
4696   free_mem = nodeinfo[node].payload.get('memory_free', None)
4697   if not isinstance(free_mem, int):
4698     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4699                                " was '%s'" % (node, free_mem),
4700                                errors.ECODE_ENVIRON)
4701   if requested > free_mem:
4702     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4703                                " needed %s MiB, available %s MiB" %
4704                                (node, reason, requested, free_mem),
4705                                errors.ECODE_NORES)
4706
4707
4708 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
4709   """Checks if nodes have enough free disk space in the all VGs.
4710
4711   This function check if all given nodes have the needed amount of
4712   free disk. In case any node has less disk or we cannot get the
4713   information from the node, this function raise an OpPrereqError
4714   exception.
4715
4716   @type lu: C{LogicalUnit}
4717   @param lu: a logical unit from which we get configuration data
4718   @type nodenames: C{list}
4719   @param nodenames: the list of node names to check
4720   @type req_sizes: C{dict}
4721   @param req_sizes: the hash of vg and corresponding amount of disk in
4722       MiB to check for
4723   @raise errors.OpPrereqError: if the node doesn't have enough disk,
4724       or we cannot check the node
4725
4726   """
4727   if req_sizes is not None:
4728     for vg, req_size in req_sizes.iteritems():
4729       _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
4730
4731
4732 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
4733   """Checks if nodes have enough free disk space in the specified VG.
4734
4735   This function check if all given nodes have the needed amount of
4736   free disk. In case any node has less disk or we cannot get the
4737   information from the node, this function raise an OpPrereqError
4738   exception.
4739
4740   @type lu: C{LogicalUnit}
4741   @param lu: a logical unit from which we get configuration data
4742   @type nodenames: C{list}
4743   @param nodenames: the list of node names to check
4744   @type vg: C{str}
4745   @param vg: the volume group to check
4746   @type requested: C{int}
4747   @param requested: the amount of disk in MiB to check for
4748   @raise errors.OpPrereqError: if the node doesn't have enough disk,
4749       or we cannot check the node
4750
4751   """
4752   nodeinfo = lu.rpc.call_node_info(nodenames, vg,
4753                                    lu.cfg.GetHypervisorType())
4754   for node in nodenames:
4755     info = nodeinfo[node]
4756     info.Raise("Cannot get current information from node %s" % node,
4757                prereq=True, ecode=errors.ECODE_ENVIRON)
4758     vg_free = info.payload.get("vg_free", None)
4759     if not isinstance(vg_free, int):
4760       raise errors.OpPrereqError("Can't compute free disk space on node"
4761                                  " %s for vg %s, result was '%s'" %
4762                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
4763     if requested > vg_free:
4764       raise errors.OpPrereqError("Not enough disk space on target node %s"
4765                                  " vg %s: required %d MiB, available %d MiB" %
4766                                  (node, vg, requested, vg_free),
4767                                  errors.ECODE_NORES)
4768
4769
4770 class LUStartupInstance(LogicalUnit):
4771   """Starts an instance.
4772
4773   """
4774   HPATH = "instance-start"
4775   HTYPE = constants.HTYPE_INSTANCE
4776   _OP_PARAMS = [
4777     _PInstanceName,
4778     _PForce,
4779     _PIgnoreOfflineNodes,
4780     ("hvparams", ht.EmptyDict, ht.TDict),
4781     ("beparams", ht.EmptyDict, ht.TDict),
4782     ]
4783   REQ_BGL = False
4784
4785   def CheckArguments(self):
4786     # extra beparams
4787     if self.op.beparams:
4788       # fill the beparams dict
4789       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4790
4791   def ExpandNames(self):
4792     self._ExpandAndLockInstance()
4793
4794   def BuildHooksEnv(self):
4795     """Build hooks env.
4796
4797     This runs on master, primary and secondary nodes of the instance.
4798
4799     """
4800     env = {
4801       "FORCE": self.op.force,
4802       }
4803     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4804     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4805     return env, nl, nl
4806
4807   def CheckPrereq(self):
4808     """Check prerequisites.
4809
4810     This checks that the instance is in the cluster.
4811
4812     """
4813     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4814     assert self.instance is not None, \
4815       "Cannot retrieve locked instance %s" % self.op.instance_name
4816
4817     # extra hvparams
4818     if self.op.hvparams:
4819       # check hypervisor parameter syntax (locally)
4820       cluster = self.cfg.GetClusterInfo()
4821       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4822       filled_hvp = cluster.FillHV(instance)
4823       filled_hvp.update(self.op.hvparams)
4824       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4825       hv_type.CheckParameterSyntax(filled_hvp)
4826       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4827
4828     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
4829
4830     if self.primary_offline and self.op.ignore_offline_nodes:
4831       self.proc.LogWarning("Ignoring offline primary node")
4832
4833       if self.op.hvparams or self.op.beparams:
4834         self.proc.LogWarning("Overridden parameters are ignored")
4835     else:
4836       _CheckNodeOnline(self, instance.primary_node)
4837
4838       bep = self.cfg.GetClusterInfo().FillBE(instance)
4839
4840       # check bridges existence
4841       _CheckInstanceBridgesExist(self, instance)
4842
4843       remote_info = self.rpc.call_instance_info(instance.primary_node,
4844                                                 instance.name,
4845                                                 instance.hypervisor)
4846       remote_info.Raise("Error checking node %s" % instance.primary_node,
4847                         prereq=True, ecode=errors.ECODE_ENVIRON)
4848       if not remote_info.payload: # not running already
4849         _CheckNodeFreeMemory(self, instance.primary_node,
4850                              "starting instance %s" % instance.name,
4851                              bep[constants.BE_MEMORY], instance.hypervisor)
4852
4853   def Exec(self, feedback_fn):
4854     """Start the instance.
4855
4856     """
4857     instance = self.instance
4858     force = self.op.force
4859
4860     self.cfg.MarkInstanceUp(instance.name)
4861
4862     if self.primary_offline:
4863       assert self.op.ignore_offline_nodes
4864       self.proc.LogInfo("Primary node offline, marked instance as started")
4865     else:
4866       node_current = instance.primary_node
4867
4868       _StartInstanceDisks(self, instance, force)
4869
4870       result = self.rpc.call_instance_start(node_current, instance,
4871                                             self.op.hvparams, self.op.beparams)
4872       msg = result.fail_msg
4873       if msg:
4874         _ShutdownInstanceDisks(self, instance)
4875         raise errors.OpExecError("Could not start instance: %s" % msg)
4876
4877
4878 class LURebootInstance(LogicalUnit):
4879   """Reboot an instance.
4880
4881   """
4882   HPATH = "instance-reboot"
4883   HTYPE = constants.HTYPE_INSTANCE
4884   _OP_PARAMS = [
4885     _PInstanceName,
4886     ("ignore_secondaries", False, ht.TBool),
4887     ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
4888     _PShutdownTimeout,
4889     ]
4890   REQ_BGL = False
4891
4892   def ExpandNames(self):
4893     self._ExpandAndLockInstance()
4894
4895   def BuildHooksEnv(self):
4896     """Build hooks env.
4897
4898     This runs on master, primary and secondary nodes of the instance.
4899
4900     """
4901     env = {
4902       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4903       "REBOOT_TYPE": self.op.reboot_type,
4904       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4905       }
4906     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4907     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4908     return env, nl, nl
4909
4910   def CheckPrereq(self):
4911     """Check prerequisites.
4912
4913     This checks that the instance is in the cluster.
4914
4915     """
4916     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4917     assert self.instance is not None, \
4918       "Cannot retrieve locked instance %s" % self.op.instance_name
4919
4920     _CheckNodeOnline(self, instance.primary_node)
4921
4922     # check bridges existence
4923     _CheckInstanceBridgesExist(self, instance)
4924
4925   def Exec(self, feedback_fn):
4926     """Reboot the instance.
4927
4928     """
4929     instance = self.instance
4930     ignore_secondaries = self.op.ignore_secondaries
4931     reboot_type = self.op.reboot_type
4932
4933     node_current = instance.primary_node
4934
4935     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4936                        constants.INSTANCE_REBOOT_HARD]:
4937       for disk in instance.disks:
4938         self.cfg.SetDiskID(disk, node_current)
4939       result = self.rpc.call_instance_reboot(node_current, instance,
4940                                              reboot_type,
4941                                              self.op.shutdown_timeout)
4942       result.Raise("Could not reboot instance")
4943     else:
4944       result = self.rpc.call_instance_shutdown(node_current, instance,
4945                                                self.op.shutdown_timeout)
4946       result.Raise("Could not shutdown instance for full reboot")
4947       _ShutdownInstanceDisks(self, instance)
4948       _StartInstanceDisks(self, instance, ignore_secondaries)
4949       result = self.rpc.call_instance_start(node_current, instance, None, None)
4950       msg = result.fail_msg
4951       if msg:
4952         _ShutdownInstanceDisks(self, instance)
4953         raise errors.OpExecError("Could not start instance for"
4954                                  " full reboot: %s" % msg)
4955
4956     self.cfg.MarkInstanceUp(instance.name)
4957
4958
4959 class LUShutdownInstance(LogicalUnit):
4960   """Shutdown an instance.
4961
4962   """
4963   HPATH = "instance-stop"
4964   HTYPE = constants.HTYPE_INSTANCE
4965   _OP_PARAMS = [
4966     _PInstanceName,
4967     _PIgnoreOfflineNodes,
4968     ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
4969     ]
4970   REQ_BGL = False
4971
4972   def ExpandNames(self):
4973     self._ExpandAndLockInstance()
4974
4975   def BuildHooksEnv(self):
4976     """Build hooks env.
4977
4978     This runs on master, primary and secondary nodes of the instance.
4979
4980     """
4981     env = _BuildInstanceHookEnvByObject(self, self.instance)
4982     env["TIMEOUT"] = self.op.timeout
4983     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4984     return env, nl, nl
4985
4986   def CheckPrereq(self):
4987     """Check prerequisites.
4988
4989     This checks that the instance is in the cluster.
4990
4991     """
4992     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4993     assert self.instance is not None, \
4994       "Cannot retrieve locked instance %s" % self.op.instance_name
4995
4996     self.primary_offline = \
4997       self.cfg.GetNodeInfo(self.instance.primary_node).offline
4998
4999     if self.primary_offline and self.op.ignore_offline_nodes:
5000       self.proc.LogWarning("Ignoring offline primary node")
5001     else:
5002       _CheckNodeOnline(self, self.instance.primary_node)
5003
5004   def Exec(self, feedback_fn):
5005     """Shutdown the instance.
5006
5007     """
5008     instance = self.instance
5009     node_current = instance.primary_node
5010     timeout = self.op.timeout
5011
5012     self.cfg.MarkInstanceDown(instance.name)
5013
5014     if self.primary_offline:
5015       assert self.op.ignore_offline_nodes
5016       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5017     else:
5018       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5019       msg = result.fail_msg
5020       if msg:
5021         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5022
5023       _ShutdownInstanceDisks(self, instance)
5024
5025
5026 class LUReinstallInstance(LogicalUnit):
5027   """Reinstall an instance.
5028
5029   """
5030   HPATH = "instance-reinstall"
5031   HTYPE = constants.HTYPE_INSTANCE
5032   _OP_PARAMS = [
5033     _PInstanceName,
5034     ("os_type", None, ht.TMaybeString),
5035     ("force_variant", False, ht.TBool),
5036     ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
5037     ]
5038   REQ_BGL = False
5039
5040   def ExpandNames(self):
5041     self._ExpandAndLockInstance()
5042
5043   def BuildHooksEnv(self):
5044     """Build hooks env.
5045
5046     This runs on master, primary and secondary nodes of the instance.
5047
5048     """
5049     env = _BuildInstanceHookEnvByObject(self, self.instance)
5050     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5051     return env, nl, nl
5052
5053   def CheckPrereq(self):
5054     """Check prerequisites.
5055
5056     This checks that the instance is in the cluster and is not running.
5057
5058     """
5059     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5060     assert instance is not None, \
5061       "Cannot retrieve locked instance %s" % self.op.instance_name
5062     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5063                      " offline, cannot reinstall")
5064     for node in instance.secondary_nodes:
5065       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5066                        " cannot reinstall")
5067
5068     if instance.disk_template == constants.DT_DISKLESS:
5069       raise errors.OpPrereqError("Instance '%s' has no disks" %
5070                                  self.op.instance_name,
5071                                  errors.ECODE_INVAL)
5072     _CheckInstanceDown(self, instance, "cannot reinstall")
5073
5074     if self.op.os_type is not None:
5075       # OS verification
5076       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5077       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5078       instance_os = self.op.os_type
5079     else:
5080       instance_os = instance.os
5081
5082     nodelist = list(instance.all_nodes)
5083
5084     if self.op.osparams:
5085       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5086       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5087       self.os_inst = i_osdict # the new dict (without defaults)
5088     else:
5089       self.os_inst = None
5090
5091     self.instance = instance
5092
5093   def Exec(self, feedback_fn):
5094     """Reinstall the instance.
5095
5096     """
5097     inst = self.instance
5098
5099     if self.op.os_type is not None:
5100       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5101       inst.os = self.op.os_type
5102       # Write to configuration
5103       self.cfg.Update(inst, feedback_fn)
5104
5105     _StartInstanceDisks(self, inst, None)
5106     try:
5107       feedback_fn("Running the instance OS create scripts...")
5108       # FIXME: pass debug option from opcode to backend
5109       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5110                                              self.op.debug_level,
5111                                              osparams=self.os_inst)
5112       result.Raise("Could not install OS for instance %s on node %s" %
5113                    (inst.name, inst.primary_node))
5114     finally:
5115       _ShutdownInstanceDisks(self, inst)
5116
5117
5118 class LURecreateInstanceDisks(LogicalUnit):
5119   """Recreate an instance's missing disks.
5120
5121   """
5122   HPATH = "instance-recreate-disks"
5123   HTYPE = constants.HTYPE_INSTANCE
5124   _OP_PARAMS = [
5125     _PInstanceName,
5126     ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
5127     ]
5128   REQ_BGL = False
5129
5130   def ExpandNames(self):
5131     self._ExpandAndLockInstance()
5132
5133   def BuildHooksEnv(self):
5134     """Build hooks env.
5135
5136     This runs on master, primary and secondary nodes of the instance.
5137
5138     """
5139     env = _BuildInstanceHookEnvByObject(self, self.instance)
5140     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5141     return env, nl, nl
5142
5143   def CheckPrereq(self):
5144     """Check prerequisites.
5145
5146     This checks that the instance is in the cluster and is not running.
5147
5148     """
5149     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5150     assert instance is not None, \
5151       "Cannot retrieve locked instance %s" % self.op.instance_name
5152     _CheckNodeOnline(self, instance.primary_node)
5153
5154     if instance.disk_template == constants.DT_DISKLESS:
5155       raise errors.OpPrereqError("Instance '%s' has no disks" %
5156                                  self.op.instance_name, errors.ECODE_INVAL)
5157     _CheckInstanceDown(self, instance, "cannot recreate disks")
5158
5159     if not self.op.disks:
5160       self.op.disks = range(len(instance.disks))
5161     else:
5162       for idx in self.op.disks:
5163         if idx >= len(instance.disks):
5164           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5165                                      errors.ECODE_INVAL)
5166
5167     self.instance = instance
5168
5169   def Exec(self, feedback_fn):
5170     """Recreate the disks.
5171
5172     """
5173     to_skip = []
5174     for idx, _ in enumerate(self.instance.disks):
5175       if idx not in self.op.disks: # disk idx has not been passed in
5176         to_skip.append(idx)
5177         continue
5178
5179     _CreateDisks(self, self.instance, to_skip=to_skip)
5180
5181
5182 class LURenameInstance(LogicalUnit):
5183   """Rename an instance.
5184
5185   """
5186   HPATH = "instance-rename"
5187   HTYPE = constants.HTYPE_INSTANCE
5188   _OP_PARAMS = [
5189     _PInstanceName,
5190     ("new_name", ht.NoDefault, ht.TNonEmptyString),
5191     ("ip_check", False, ht.TBool),
5192     ("name_check", True, ht.TBool),
5193     ]
5194
5195   def CheckArguments(self):
5196     """Check arguments.
5197
5198     """
5199     if self.op.ip_check and not self.op.name_check:
5200       # TODO: make the ip check more flexible and not depend on the name check
5201       raise errors.OpPrereqError("Cannot do ip check without a name check",
5202                                  errors.ECODE_INVAL)
5203
5204   def BuildHooksEnv(self):
5205     """Build hooks env.
5206
5207     This runs on master, primary and secondary nodes of the instance.
5208
5209     """
5210     env = _BuildInstanceHookEnvByObject(self, self.instance)
5211     env["INSTANCE_NEW_NAME"] = self.op.new_name
5212     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5213     return env, nl, nl
5214
5215   def CheckPrereq(self):
5216     """Check prerequisites.
5217
5218     This checks that the instance is in the cluster and is not running.
5219
5220     """
5221     self.op.instance_name = _ExpandInstanceName(self.cfg,
5222                                                 self.op.instance_name)
5223     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5224     assert instance is not None
5225     _CheckNodeOnline(self, instance.primary_node)
5226     _CheckInstanceDown(self, instance, "cannot rename")
5227     self.instance = instance
5228
5229     new_name = self.op.new_name
5230     if self.op.name_check:
5231       hostname = netutils.GetHostname(name=new_name)
5232       new_name = self.op.new_name = hostname.name
5233       if (self.op.ip_check and
5234           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5235         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5236                                    (hostname.ip, new_name),
5237                                    errors.ECODE_NOTUNIQUE)
5238
5239     instance_list = self.cfg.GetInstanceList()
5240     if new_name in instance_list:
5241       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5242                                  new_name, errors.ECODE_EXISTS)
5243
5244   def Exec(self, feedback_fn):
5245     """Reinstall the instance.
5246
5247     """
5248     inst = self.instance
5249     old_name = inst.name
5250
5251     if inst.disk_template == constants.DT_FILE:
5252       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5253
5254     self.cfg.RenameInstance(inst.name, self.op.new_name)
5255     # Change the instance lock. This is definitely safe while we hold the BGL
5256     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5257     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5258
5259     # re-read the instance from the configuration after rename
5260     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5261
5262     if inst.disk_template == constants.DT_FILE:
5263       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5264       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5265                                                      old_file_storage_dir,
5266                                                      new_file_storage_dir)
5267       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5268                    " (but the instance has been renamed in Ganeti)" %
5269                    (inst.primary_node, old_file_storage_dir,
5270                     new_file_storage_dir))
5271
5272     _StartInstanceDisks(self, inst, None)
5273     try:
5274       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5275                                                  old_name, self.op.debug_level)
5276       msg = result.fail_msg
5277       if msg:
5278         msg = ("Could not run OS rename script for instance %s on node %s"
5279                " (but the instance has been renamed in Ganeti): %s" %
5280                (inst.name, inst.primary_node, msg))
5281         self.proc.LogWarning(msg)
5282     finally:
5283       _ShutdownInstanceDisks(self, inst)
5284
5285     return inst.name
5286
5287
5288 class LURemoveInstance(LogicalUnit):
5289   """Remove an instance.
5290
5291   """
5292   HPATH = "instance-remove"
5293   HTYPE = constants.HTYPE_INSTANCE
5294   _OP_PARAMS = [
5295     _PInstanceName,
5296     ("ignore_failures", False, ht.TBool),
5297     _PShutdownTimeout,
5298     ]
5299   REQ_BGL = False
5300
5301   def ExpandNames(self):
5302     self._ExpandAndLockInstance()
5303     self.needed_locks[locking.LEVEL_NODE] = []
5304     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5305
5306   def DeclareLocks(self, level):
5307     if level == locking.LEVEL_NODE:
5308       self._LockInstancesNodes()
5309
5310   def BuildHooksEnv(self):
5311     """Build hooks env.
5312
5313     This runs on master, primary and secondary nodes of the instance.
5314
5315     """
5316     env = _BuildInstanceHookEnvByObject(self, self.instance)
5317     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5318     nl = [self.cfg.GetMasterNode()]
5319     nl_post = list(self.instance.all_nodes) + nl
5320     return env, nl, nl_post
5321
5322   def CheckPrereq(self):
5323     """Check prerequisites.
5324
5325     This checks that the instance is in the cluster.
5326
5327     """
5328     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5329     assert self.instance is not None, \
5330       "Cannot retrieve locked instance %s" % self.op.instance_name
5331
5332   def Exec(self, feedback_fn):
5333     """Remove the instance.
5334
5335     """
5336     instance = self.instance
5337     logging.info("Shutting down instance %s on node %s",
5338                  instance.name, instance.primary_node)
5339
5340     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5341                                              self.op.shutdown_timeout)
5342     msg = result.fail_msg
5343     if msg:
5344       if self.op.ignore_failures:
5345         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5346       else:
5347         raise errors.OpExecError("Could not shutdown instance %s on"
5348                                  " node %s: %s" %
5349                                  (instance.name, instance.primary_node, msg))
5350
5351     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5352
5353
5354 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5355   """Utility function to remove an instance.
5356
5357   """
5358   logging.info("Removing block devices for instance %s", instance.name)
5359
5360   if not _RemoveDisks(lu, instance):
5361     if not ignore_failures:
5362       raise errors.OpExecError("Can't remove instance's disks")
5363     feedback_fn("Warning: can't remove instance's disks")
5364
5365   logging.info("Removing instance %s out of cluster config", instance.name)
5366
5367   lu.cfg.RemoveInstance(instance.name)
5368
5369   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5370     "Instance lock removal conflict"
5371
5372   # Remove lock for the instance
5373   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5374
5375
5376 class LUQueryInstances(NoHooksLU):
5377   """Logical unit for querying instances.
5378
5379   """
5380   # pylint: disable-msg=W0142
5381   _OP_PARAMS = [
5382     _POutputFields,
5383     ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5384     ("use_locking", False, ht.TBool),
5385     ]
5386   REQ_BGL = False
5387   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5388                     "serial_no", "ctime", "mtime", "uuid"]
5389   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5390                                     "admin_state",
5391                                     "disk_template", "ip", "mac", "bridge",
5392                                     "nic_mode", "nic_link",
5393                                     "sda_size", "sdb_size", "vcpus", "tags",
5394                                     "network_port", "beparams",
5395                                     r"(disk)\.(size)/([0-9]+)",
5396                                     r"(disk)\.(sizes)", "disk_usage",
5397                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5398                                     r"(nic)\.(bridge)/([0-9]+)",
5399                                     r"(nic)\.(macs|ips|modes|links|bridges)",
5400                                     r"(disk|nic)\.(count)",
5401                                     "hvparams", "custom_hvparams",
5402                                     "custom_beparams", "custom_nicparams",
5403                                     ] + _SIMPLE_FIELDS +
5404                                   ["hv/%s" % name
5405                                    for name in constants.HVS_PARAMETERS
5406                                    if name not in constants.HVC_GLOBALS] +
5407                                   ["be/%s" % name
5408                                    for name in constants.BES_PARAMETERS])
5409   _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5410                                    "oper_ram",
5411                                    "oper_vcpus",
5412                                    "status")
5413
5414
5415   def CheckArguments(self):
5416     _CheckOutputFields(static=self._FIELDS_STATIC,
5417                        dynamic=self._FIELDS_DYNAMIC,
5418                        selected=self.op.output_fields)
5419
5420   def ExpandNames(self):
5421     self.needed_locks = {}
5422     self.share_locks[locking.LEVEL_INSTANCE] = 1
5423     self.share_locks[locking.LEVEL_NODE] = 1
5424
5425     if self.op.names:
5426       self.wanted = _GetWantedInstances(self, self.op.names)
5427     else:
5428       self.wanted = locking.ALL_SET
5429
5430     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5431     self.do_locking = self.do_node_query and self.op.use_locking
5432     if self.do_locking:
5433       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5434       self.needed_locks[locking.LEVEL_NODE] = []
5435       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5436
5437   def DeclareLocks(self, level):
5438     if level == locking.LEVEL_NODE and self.do_locking:
5439       self._LockInstancesNodes()
5440
5441   def Exec(self, feedback_fn):
5442     """Computes the list of nodes and their attributes.
5443
5444     """
5445     # pylint: disable-msg=R0912
5446     # way too many branches here
5447     all_info = self.cfg.GetAllInstancesInfo()
5448     if self.wanted == locking.ALL_SET:
5449       # caller didn't specify instance names, so ordering is not important
5450       if self.do_locking:
5451         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5452       else:
5453         instance_names = all_info.keys()
5454       instance_names = utils.NiceSort(instance_names)
5455     else:
5456       # caller did specify names, so we must keep the ordering
5457       if self.do_locking:
5458         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5459       else:
5460         tgt_set = all_info.keys()
5461       missing = set(self.wanted).difference(tgt_set)
5462       if missing:
5463         raise errors.OpExecError("Some instances were removed before"
5464                                  " retrieving their data: %s" % missing)
5465       instance_names = self.wanted
5466
5467     instance_list = [all_info[iname] for iname in instance_names]
5468
5469     # begin data gathering
5470
5471     nodes = frozenset([inst.primary_node for inst in instance_list])
5472     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5473
5474     bad_nodes = []
5475     off_nodes = []
5476     if self.do_node_query:
5477       live_data = {}
5478       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5479       for name in nodes:
5480         result = node_data[name]
5481         if result.offline:
5482           # offline nodes will be in both lists
5483           off_nodes.append(name)
5484         if result.fail_msg:
5485           bad_nodes.append(name)
5486         else:
5487           if result.payload:
5488             live_data.update(result.payload)
5489           # else no instance is alive
5490     else:
5491       live_data = dict([(name, {}) for name in instance_names])
5492
5493     # end data gathering
5494
5495     HVPREFIX = "hv/"
5496     BEPREFIX = "be/"
5497     output = []
5498     cluster = self.cfg.GetClusterInfo()
5499     for instance in instance_list:
5500       iout = []
5501       i_hv = cluster.FillHV(instance, skip_globals=True)
5502       i_be = cluster.FillBE(instance)
5503       i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5504       for field in self.op.output_fields:
5505         st_match = self._FIELDS_STATIC.Matches(field)
5506         if field in self._SIMPLE_FIELDS:
5507           val = getattr(instance, field)
5508         elif field == "pnode":
5509           val = instance.primary_node
5510         elif field == "snodes":
5511           val = list(instance.secondary_nodes)
5512         elif field == "admin_state":
5513           val = instance.admin_up
5514         elif field == "oper_state":
5515           if instance.primary_node in bad_nodes:
5516             val = None
5517           else:
5518             val = bool(live_data.get(instance.name))
5519         elif field == "status":
5520           if instance.primary_node in off_nodes:
5521             val = "ERROR_nodeoffline"
5522           elif instance.primary_node in bad_nodes:
5523             val = "ERROR_nodedown"
5524           else:
5525             running = bool(live_data.get(instance.name))
5526             if running:
5527               if instance.admin_up:
5528                 val = "running"
5529               else:
5530                 val = "ERROR_up"
5531             else:
5532               if instance.admin_up:
5533                 val = "ERROR_down"
5534               else:
5535                 val = "ADMIN_down"
5536         elif field == "oper_ram":
5537           if instance.primary_node in bad_nodes:
5538             val = None
5539           elif instance.name in live_data:
5540             val = live_data[instance.name].get("memory", "?")
5541           else:
5542             val = "-"
5543         elif field == "oper_vcpus":
5544           if instance.primary_node in bad_nodes:
5545             val = None
5546           elif instance.name in live_data:
5547             val = live_data[instance.name].get("vcpus", "?")
5548           else:
5549             val = "-"
5550         elif field == "vcpus":
5551           val = i_be[constants.BE_VCPUS]
5552         elif field == "disk_template":
5553           val = instance.disk_template
5554         elif field == "ip":
5555           if instance.nics:
5556             val = instance.nics[0].ip
5557           else:
5558             val = None
5559         elif field == "nic_mode":
5560           if instance.nics:
5561             val = i_nicp[0][constants.NIC_MODE]
5562           else:
5563             val = None
5564         elif field == "nic_link":
5565           if instance.nics:
5566             val = i_nicp[0][constants.NIC_LINK]
5567           else:
5568             val = None
5569         elif field == "bridge":
5570           if (instance.nics and
5571               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5572             val = i_nicp[0][constants.NIC_LINK]
5573           else:
5574             val = None
5575         elif field == "mac":
5576           if instance.nics:
5577             val = instance.nics[0].mac
5578           else:
5579             val = None
5580         elif field == "custom_nicparams":
5581           val = [nic.nicparams for nic in instance.nics]
5582         elif field == "sda_size" or field == "sdb_size":
5583           idx = ord(field[2]) - ord('a')
5584           try:
5585             val = instance.FindDisk(idx).size
5586           except errors.OpPrereqError:
5587             val = None
5588         elif field == "disk_usage": # total disk usage per node
5589           disk_sizes = [{'size': disk.size} for disk in instance.disks]
5590           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5591         elif field == "tags":
5592           val = list(instance.GetTags())
5593         elif field == "custom_hvparams":
5594           val = instance.hvparams # not filled!
5595         elif field == "hvparams":
5596           val = i_hv
5597         elif (field.startswith(HVPREFIX) and
5598               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5599               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5600           val = i_hv.get(field[len(HVPREFIX):], None)
5601         elif field == "custom_beparams":
5602           val = instance.beparams
5603         elif field == "beparams":
5604           val = i_be
5605         elif (field.startswith(BEPREFIX) and
5606               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5607           val = i_be.get(field[len(BEPREFIX):], None)
5608         elif st_match and st_match.groups():
5609           # matches a variable list
5610           st_groups = st_match.groups()
5611           if st_groups and st_groups[0] == "disk":
5612             if st_groups[1] == "count":
5613               val = len(instance.disks)
5614             elif st_groups[1] == "sizes":
5615               val = [disk.size for disk in instance.disks]
5616             elif st_groups[1] == "size":
5617               try:
5618                 val = instance.FindDisk(st_groups[2]).size
5619               except errors.OpPrereqError:
5620                 val = None
5621             else:
5622               assert False, "Unhandled disk parameter"
5623           elif st_groups[0] == "nic":
5624             if st_groups[1] == "count":
5625               val = len(instance.nics)
5626             elif st_groups[1] == "macs":
5627               val = [nic.mac for nic in instance.nics]
5628             elif st_groups[1] == "ips":
5629               val = [nic.ip for nic in instance.nics]
5630             elif st_groups[1] == "modes":
5631               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5632             elif st_groups[1] == "links":
5633               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5634             elif st_groups[1] == "bridges":
5635               val = []
5636               for nicp in i_nicp:
5637                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5638                   val.append(nicp[constants.NIC_LINK])
5639                 else:
5640                   val.append(None)
5641             else:
5642               # index-based item
5643               nic_idx = int(st_groups[2])
5644               if nic_idx >= len(instance.nics):
5645                 val = None
5646               else:
5647                 if st_groups[1] == "mac":
5648                   val = instance.nics[nic_idx].mac
5649                 elif st_groups[1] == "ip":
5650                   val = instance.nics[nic_idx].ip
5651                 elif st_groups[1] == "mode":
5652                   val = i_nicp[nic_idx][constants.NIC_MODE]
5653                 elif st_groups[1] == "link":
5654                   val = i_nicp[nic_idx][constants.NIC_LINK]
5655                 elif st_groups[1] == "bridge":
5656                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5657                   if nic_mode == constants.NIC_MODE_BRIDGED:
5658                     val = i_nicp[nic_idx][constants.NIC_LINK]
5659                   else:
5660                     val = None
5661                 else:
5662                   assert False, "Unhandled NIC parameter"
5663           else:
5664             assert False, ("Declared but unhandled variable parameter '%s'" %
5665                            field)
5666         else:
5667           assert False, "Declared but unhandled parameter '%s'" % field
5668         iout.append(val)
5669       output.append(iout)
5670
5671     return output
5672
5673
5674 class LUFailoverInstance(LogicalUnit):
5675   """Failover an instance.
5676
5677   """
5678   HPATH = "instance-failover"
5679   HTYPE = constants.HTYPE_INSTANCE
5680   _OP_PARAMS = [
5681     _PInstanceName,
5682     ("ignore_consistency", False, ht.TBool),
5683     _PShutdownTimeout,
5684     ]
5685   REQ_BGL = False
5686
5687   def ExpandNames(self):
5688     self._ExpandAndLockInstance()
5689     self.needed_locks[locking.LEVEL_NODE] = []
5690     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5691
5692   def DeclareLocks(self, level):
5693     if level == locking.LEVEL_NODE:
5694       self._LockInstancesNodes()
5695
5696   def BuildHooksEnv(self):
5697     """Build hooks env.
5698
5699     This runs on master, primary and secondary nodes of the instance.
5700
5701     """
5702     instance = self.instance
5703     source_node = instance.primary_node
5704     target_node = instance.secondary_nodes[0]
5705     env = {
5706       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5707       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5708       "OLD_PRIMARY": source_node,
5709       "OLD_SECONDARY": target_node,
5710       "NEW_PRIMARY": target_node,
5711       "NEW_SECONDARY": source_node,
5712       }
5713     env.update(_BuildInstanceHookEnvByObject(self, instance))
5714     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5715     nl_post = list(nl)
5716     nl_post.append(source_node)
5717     return env, nl, nl_post
5718
5719   def CheckPrereq(self):
5720     """Check prerequisites.
5721
5722     This checks that the instance is in the cluster.
5723
5724     """
5725     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5726     assert self.instance is not None, \
5727       "Cannot retrieve locked instance %s" % self.op.instance_name
5728
5729     bep = self.cfg.GetClusterInfo().FillBE(instance)
5730     if instance.disk_template not in constants.DTS_NET_MIRROR:
5731       raise errors.OpPrereqError("Instance's disk layout is not"
5732                                  " network mirrored, cannot failover.",
5733                                  errors.ECODE_STATE)
5734
5735     secondary_nodes = instance.secondary_nodes
5736     if not secondary_nodes:
5737       raise errors.ProgrammerError("no secondary node but using "
5738                                    "a mirrored disk template")
5739
5740     target_node = secondary_nodes[0]
5741     _CheckNodeOnline(self, target_node)
5742     _CheckNodeNotDrained(self, target_node)
5743     if instance.admin_up:
5744       # check memory requirements on the secondary node
5745       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5746                            instance.name, bep[constants.BE_MEMORY],
5747                            instance.hypervisor)
5748     else:
5749       self.LogInfo("Not checking memory on the secondary node as"
5750                    " instance will not be started")
5751
5752     # check bridge existance
5753     _CheckInstanceBridgesExist(self, instance, node=target_node)
5754
5755   def Exec(self, feedback_fn):
5756     """Failover an instance.
5757
5758     The failover is done by shutting it down on its present node and
5759     starting it on the secondary.
5760
5761     """
5762     instance = self.instance
5763     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5764
5765     source_node = instance.primary_node
5766     target_node = instance.secondary_nodes[0]
5767
5768     if instance.admin_up:
5769       feedback_fn("* checking disk consistency between source and target")
5770       for dev in instance.disks:
5771         # for drbd, these are drbd over lvm
5772         if not _CheckDiskConsistency(self, dev, target_node, False):
5773           if not self.op.ignore_consistency:
5774             raise errors.OpExecError("Disk %s is degraded on target node,"
5775                                      " aborting failover." % dev.iv_name)
5776     else:
5777       feedback_fn("* not checking disk consistency as instance is not running")
5778
5779     feedback_fn("* shutting down instance on source node")
5780     logging.info("Shutting down instance %s on node %s",
5781                  instance.name, source_node)
5782
5783     result = self.rpc.call_instance_shutdown(source_node, instance,
5784                                              self.op.shutdown_timeout)
5785     msg = result.fail_msg
5786     if msg:
5787       if self.op.ignore_consistency or primary_node.offline:
5788         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5789                              " Proceeding anyway. Please make sure node"
5790                              " %s is down. Error details: %s",
5791                              instance.name, source_node, source_node, msg)
5792       else:
5793         raise errors.OpExecError("Could not shutdown instance %s on"
5794                                  " node %s: %s" %
5795                                  (instance.name, source_node, msg))
5796
5797     feedback_fn("* deactivating the instance's disks on source node")
5798     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5799       raise errors.OpExecError("Can't shut down the instance's disks.")
5800
5801     instance.primary_node = target_node
5802     # distribute new instance config to the other nodes
5803     self.cfg.Update(instance, feedback_fn)
5804
5805     # Only start the instance if it's marked as up
5806     if instance.admin_up:
5807       feedback_fn("* activating the instance's disks on target node")
5808       logging.info("Starting instance %s on node %s",
5809                    instance.name, target_node)
5810
5811       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5812                                            ignore_secondaries=True)
5813       if not disks_ok:
5814         _ShutdownInstanceDisks(self, instance)
5815         raise errors.OpExecError("Can't activate the instance's disks")
5816
5817       feedback_fn("* starting the instance on the target node")
5818       result = self.rpc.call_instance_start(target_node, instance, None, None)
5819       msg = result.fail_msg
5820       if msg:
5821         _ShutdownInstanceDisks(self, instance)
5822         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5823                                  (instance.name, target_node, msg))
5824
5825
5826 class LUMigrateInstance(LogicalUnit):
5827   """Migrate an instance.
5828
5829   This is migration without shutting down, compared to the failover,
5830   which is done with shutdown.
5831
5832   """
5833   HPATH = "instance-migrate"
5834   HTYPE = constants.HTYPE_INSTANCE
5835   _OP_PARAMS = [
5836     _PInstanceName,
5837     _PMigrationMode,
5838     _PMigrationLive,
5839     ("cleanup", False, ht.TBool),
5840     ]
5841
5842   REQ_BGL = False
5843
5844   def ExpandNames(self):
5845     self._ExpandAndLockInstance()
5846
5847     self.needed_locks[locking.LEVEL_NODE] = []
5848     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5849
5850     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5851                                        self.op.cleanup)
5852     self.tasklets = [self._migrater]
5853
5854   def DeclareLocks(self, level):
5855     if level == locking.LEVEL_NODE:
5856       self._LockInstancesNodes()
5857
5858   def BuildHooksEnv(self):
5859     """Build hooks env.
5860
5861     This runs on master, primary and secondary nodes of the instance.
5862
5863     """
5864     instance = self._migrater.instance
5865     source_node = instance.primary_node
5866     target_node = instance.secondary_nodes[0]
5867     env = _BuildInstanceHookEnvByObject(self, instance)
5868     env["MIGRATE_LIVE"] = self._migrater.live
5869     env["MIGRATE_CLEANUP"] = self.op.cleanup
5870     env.update({
5871         "OLD_PRIMARY": source_node,
5872         "OLD_SECONDARY": target_node,
5873         "NEW_PRIMARY": target_node,
5874         "NEW_SECONDARY": source_node,
5875         })
5876     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5877     nl_post = list(nl)
5878     nl_post.append(source_node)
5879     return env, nl, nl_post
5880
5881
5882 class LUMoveInstance(LogicalUnit):
5883   """Move an instance by data-copying.
5884
5885   """
5886   HPATH = "instance-move"
5887   HTYPE = constants.HTYPE_INSTANCE
5888   _OP_PARAMS = [
5889     _PInstanceName,
5890     ("target_node", ht.NoDefault, ht.TNonEmptyString),
5891     _PShutdownTimeout,
5892     ]
5893   REQ_BGL = False
5894
5895   def ExpandNames(self):
5896     self._ExpandAndLockInstance()
5897     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5898     self.op.target_node = target_node
5899     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5900     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5901
5902   def DeclareLocks(self, level):
5903     if level == locking.LEVEL_NODE:
5904       self._LockInstancesNodes(primary_only=True)
5905
5906   def BuildHooksEnv(self):
5907     """Build hooks env.
5908
5909     This runs on master, primary and secondary nodes of the instance.
5910
5911     """
5912     env = {
5913       "TARGET_NODE": self.op.target_node,
5914       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5915       }
5916     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5917     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5918                                        self.op.target_node]
5919     return env, nl, nl
5920
5921   def CheckPrereq(self):
5922     """Check prerequisites.
5923
5924     This checks that the instance is in the cluster.
5925
5926     """
5927     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5928     assert self.instance is not None, \
5929       "Cannot retrieve locked instance %s" % self.op.instance_name
5930
5931     node = self.cfg.GetNodeInfo(self.op.target_node)
5932     assert node is not None, \
5933       "Cannot retrieve locked node %s" % self.op.target_node
5934
5935     self.target_node = target_node = node.name
5936
5937     if target_node == instance.primary_node:
5938       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5939                                  (instance.name, target_node),
5940                                  errors.ECODE_STATE)
5941
5942     bep = self.cfg.GetClusterInfo().FillBE(instance)
5943
5944     for idx, dsk in enumerate(instance.disks):
5945       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5946         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5947                                    " cannot copy" % idx, errors.ECODE_STATE)
5948
5949     _CheckNodeOnline(self, target_node)
5950     _CheckNodeNotDrained(self, target_node)
5951     _CheckNodeVmCapable(self, target_node)
5952
5953     if instance.admin_up:
5954       # check memory requirements on the secondary node
5955       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5956                            instance.name, bep[constants.BE_MEMORY],
5957                            instance.hypervisor)
5958     else:
5959       self.LogInfo("Not checking memory on the secondary node as"
5960                    " instance will not be started")
5961
5962     # check bridge existance
5963     _CheckInstanceBridgesExist(self, instance, node=target_node)
5964
5965   def Exec(self, feedback_fn):
5966     """Move an instance.
5967
5968     The move is done by shutting it down on its present node, copying
5969     the data over (slow) and starting it on the new node.
5970
5971     """
5972     instance = self.instance
5973
5974     source_node = instance.primary_node
5975     target_node = self.target_node
5976
5977     self.LogInfo("Shutting down instance %s on source node %s",
5978                  instance.name, source_node)
5979
5980     result = self.rpc.call_instance_shutdown(source_node, instance,
5981                                              self.op.shutdown_timeout)
5982     msg = result.fail_msg
5983     if msg:
5984       if self.op.ignore_consistency:
5985         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5986                              " Proceeding anyway. Please make sure node"
5987                              " %s is down. Error details: %s",
5988                              instance.name, source_node, source_node, msg)
5989       else:
5990         raise errors.OpExecError("Could not shutdown instance %s on"
5991                                  " node %s: %s" %
5992                                  (instance.name, source_node, msg))
5993
5994     # create the target disks
5995     try:
5996       _CreateDisks(self, instance, target_node=target_node)
5997     except errors.OpExecError:
5998       self.LogWarning("Device creation failed, reverting...")
5999       try:
6000         _RemoveDisks(self, instance, target_node=target_node)
6001       finally:
6002         self.cfg.ReleaseDRBDMinors(instance.name)
6003         raise
6004
6005     cluster_name = self.cfg.GetClusterInfo().cluster_name
6006
6007     errs = []
6008     # activate, get path, copy the data over
6009     for idx, disk in enumerate(instance.disks):
6010       self.LogInfo("Copying data for disk %d", idx)
6011       result = self.rpc.call_blockdev_assemble(target_node, disk,
6012                                                instance.name, True)
6013       if result.fail_msg:
6014         self.LogWarning("Can't assemble newly created disk %d: %s",
6015                         idx, result.fail_msg)
6016         errs.append(result.fail_msg)
6017         break
6018       dev_path = result.payload
6019       result = self.rpc.call_blockdev_export(source_node, disk,
6020                                              target_node, dev_path,
6021                                              cluster_name)
6022       if result.fail_msg:
6023         self.LogWarning("Can't copy data over for disk %d: %s",
6024                         idx, result.fail_msg)
6025         errs.append(result.fail_msg)
6026         break
6027
6028     if errs:
6029       self.LogWarning("Some disks failed to copy, aborting")
6030       try:
6031         _RemoveDisks(self, instance, target_node=target_node)
6032       finally:
6033         self.cfg.ReleaseDRBDMinors(instance.name)
6034         raise errors.OpExecError("Errors during disk copy: %s" %
6035                                  (",".join(errs),))
6036
6037     instance.primary_node = target_node
6038     self.cfg.Update(instance, feedback_fn)
6039
6040     self.LogInfo("Removing the disks on the original node")
6041     _RemoveDisks(self, instance, target_node=source_node)
6042
6043     # Only start the instance if it's marked as up
6044     if instance.admin_up:
6045       self.LogInfo("Starting instance %s on node %s",
6046                    instance.name, target_node)
6047
6048       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6049                                            ignore_secondaries=True)
6050       if not disks_ok:
6051         _ShutdownInstanceDisks(self, instance)
6052         raise errors.OpExecError("Can't activate the instance's disks")
6053
6054       result = self.rpc.call_instance_start(target_node, instance, None, None)
6055       msg = result.fail_msg
6056       if msg:
6057         _ShutdownInstanceDisks(self, instance)
6058         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6059                                  (instance.name, target_node, msg))
6060
6061
6062 class LUMigrateNode(LogicalUnit):
6063   """Migrate all instances from a node.
6064
6065   """
6066   HPATH = "node-migrate"
6067   HTYPE = constants.HTYPE_NODE
6068   _OP_PARAMS = [
6069     _PNodeName,
6070     _PMigrationMode,
6071     _PMigrationLive,
6072     ]
6073   REQ_BGL = False
6074
6075   def ExpandNames(self):
6076     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6077
6078     self.needed_locks = {
6079       locking.LEVEL_NODE: [self.op.node_name],
6080       }
6081
6082     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6083
6084     # Create tasklets for migrating instances for all instances on this node
6085     names = []
6086     tasklets = []
6087
6088     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6089       logging.debug("Migrating instance %s", inst.name)
6090       names.append(inst.name)
6091
6092       tasklets.append(TLMigrateInstance(self, inst.name, False))
6093
6094     self.tasklets = tasklets
6095
6096     # Declare instance locks
6097     self.needed_locks[locking.LEVEL_INSTANCE] = names
6098
6099   def DeclareLocks(self, level):
6100     if level == locking.LEVEL_NODE:
6101       self._LockInstancesNodes()
6102
6103   def BuildHooksEnv(self):
6104     """Build hooks env.
6105
6106     This runs on the master, the primary and all the secondaries.
6107
6108     """
6109     env = {
6110       "NODE_NAME": self.op.node_name,
6111       }
6112
6113     nl = [self.cfg.GetMasterNode()]
6114
6115     return (env, nl, nl)
6116
6117
6118 class TLMigrateInstance(Tasklet):
6119   """Tasklet class for instance migration.
6120
6121   @type live: boolean
6122   @ivar live: whether the migration will be done live or non-live;
6123       this variable is initalized only after CheckPrereq has run
6124
6125   """
6126   def __init__(self, lu, instance_name, cleanup):
6127     """Initializes this class.
6128
6129     """
6130     Tasklet.__init__(self, lu)
6131
6132     # Parameters
6133     self.instance_name = instance_name
6134     self.cleanup = cleanup
6135     self.live = False # will be overridden later
6136
6137   def CheckPrereq(self):
6138     """Check prerequisites.
6139
6140     This checks that the instance is in the cluster.
6141
6142     """
6143     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6144     instance = self.cfg.GetInstanceInfo(instance_name)
6145     assert instance is not None
6146
6147     if instance.disk_template != constants.DT_DRBD8:
6148       raise errors.OpPrereqError("Instance's disk layout is not"
6149                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
6150
6151     secondary_nodes = instance.secondary_nodes
6152     if not secondary_nodes:
6153       raise errors.ConfigurationError("No secondary node but using"
6154                                       " drbd8 disk template")
6155
6156     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6157
6158     target_node = secondary_nodes[0]
6159     # check memory requirements on the secondary node
6160     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6161                          instance.name, i_be[constants.BE_MEMORY],
6162                          instance.hypervisor)
6163
6164     # check bridge existance
6165     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6166
6167     if not self.cleanup:
6168       _CheckNodeNotDrained(self.lu, target_node)
6169       result = self.rpc.call_instance_migratable(instance.primary_node,
6170                                                  instance)
6171       result.Raise("Can't migrate, please use failover",
6172                    prereq=True, ecode=errors.ECODE_STATE)
6173
6174     self.instance = instance
6175
6176     if self.lu.op.live is not None and self.lu.op.mode is not None:
6177       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6178                                  " parameters are accepted",
6179                                  errors.ECODE_INVAL)
6180     if self.lu.op.live is not None:
6181       if self.lu.op.live:
6182         self.lu.op.mode = constants.HT_MIGRATION_LIVE
6183       else:
6184         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6185       # reset the 'live' parameter to None so that repeated
6186       # invocations of CheckPrereq do not raise an exception
6187       self.lu.op.live = None
6188     elif self.lu.op.mode is None:
6189       # read the default value from the hypervisor
6190       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6191       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6192
6193     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6194
6195   def _WaitUntilSync(self):
6196     """Poll with custom rpc for disk sync.
6197
6198     This uses our own step-based rpc call.
6199
6200     """
6201     self.feedback_fn("* wait until resync is done")
6202     all_done = False
6203     while not all_done:
6204       all_done = True
6205       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6206                                             self.nodes_ip,
6207                                             self.instance.disks)
6208       min_percent = 100
6209       for node, nres in result.items():
6210         nres.Raise("Cannot resync disks on node %s" % node)
6211         node_done, node_percent = nres.payload
6212         all_done = all_done and node_done
6213         if node_percent is not None:
6214           min_percent = min(min_percent, node_percent)
6215       if not all_done:
6216         if min_percent < 100:
6217           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6218         time.sleep(2)
6219
6220   def _EnsureSecondary(self, node):
6221     """Demote a node to secondary.
6222
6223     """
6224     self.feedback_fn("* switching node %s to secondary mode" % node)
6225
6226     for dev in self.instance.disks:
6227       self.cfg.SetDiskID(dev, node)
6228
6229     result = self.rpc.call_blockdev_close(node, self.instance.name,
6230                                           self.instance.disks)
6231     result.Raise("Cannot change disk to secondary on node %s" % node)
6232
6233   def _GoStandalone(self):
6234     """Disconnect from the network.
6235
6236     """
6237     self.feedback_fn("* changing into standalone mode")
6238     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6239                                                self.instance.disks)
6240     for node, nres in result.items():
6241       nres.Raise("Cannot disconnect disks node %s" % node)
6242
6243   def _GoReconnect(self, multimaster):
6244     """Reconnect to the network.
6245
6246     """
6247     if multimaster:
6248       msg = "dual-master"
6249     else:
6250       msg = "single-master"
6251     self.feedback_fn("* changing disks into %s mode" % msg)
6252     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6253                                            self.instance.disks,
6254                                            self.instance.name, multimaster)
6255     for node, nres in result.items():
6256       nres.Raise("Cannot change disks config on node %s" % node)
6257
6258   def _ExecCleanup(self):
6259     """Try to cleanup after a failed migration.
6260
6261     The cleanup is done by:
6262       - check that the instance is running only on one node
6263         (and update the config if needed)
6264       - change disks on its secondary node to secondary
6265       - wait until disks are fully synchronized
6266       - disconnect from the network
6267       - change disks into single-master mode
6268       - wait again until disks are fully synchronized
6269
6270     """
6271     instance = self.instance
6272     target_node = self.target_node
6273     source_node = self.source_node
6274
6275     # check running on only one node
6276     self.feedback_fn("* checking where the instance actually runs"
6277                      " (if this hangs, the hypervisor might be in"
6278                      " a bad state)")
6279     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6280     for node, result in ins_l.items():
6281       result.Raise("Can't contact node %s" % node)
6282
6283     runningon_source = instance.name in ins_l[source_node].payload
6284     runningon_target = instance.name in ins_l[target_node].payload
6285
6286     if runningon_source and runningon_target:
6287       raise errors.OpExecError("Instance seems to be running on two nodes,"
6288                                " or the hypervisor is confused. You will have"
6289                                " to ensure manually that it runs only on one"
6290                                " and restart this operation.")
6291
6292     if not (runningon_source or runningon_target):
6293       raise errors.OpExecError("Instance does not seem to be running at all."
6294                                " In this case, it's safer to repair by"
6295                                " running 'gnt-instance stop' to ensure disk"
6296                                " shutdown, and then restarting it.")
6297
6298     if runningon_target:
6299       # the migration has actually succeeded, we need to update the config
6300       self.feedback_fn("* instance running on secondary node (%s),"
6301                        " updating config" % target_node)
6302       instance.primary_node = target_node
6303       self.cfg.Update(instance, self.feedback_fn)
6304       demoted_node = source_node
6305     else:
6306       self.feedback_fn("* instance confirmed to be running on its"
6307                        " primary node (%s)" % source_node)
6308       demoted_node = target_node
6309
6310     self._EnsureSecondary(demoted_node)
6311     try:
6312       self._WaitUntilSync()
6313     except errors.OpExecError:
6314       # we ignore here errors, since if the device is standalone, it
6315       # won't be able to sync
6316       pass
6317     self._GoStandalone()
6318     self._GoReconnect(False)
6319     self._WaitUntilSync()
6320
6321     self.feedback_fn("* done")
6322
6323   def _RevertDiskStatus(self):
6324     """Try to revert the disk status after a failed migration.
6325
6326     """
6327     target_node = self.target_node
6328     try:
6329       self._EnsureSecondary(target_node)
6330       self._GoStandalone()
6331       self._GoReconnect(False)
6332       self._WaitUntilSync()
6333     except errors.OpExecError, err:
6334       self.lu.LogWarning("Migration failed and I can't reconnect the"
6335                          " drives: error '%s'\n"
6336                          "Please look and recover the instance status" %
6337                          str(err))
6338
6339   def _AbortMigration(self):
6340     """Call the hypervisor code to abort a started migration.
6341
6342     """
6343     instance = self.instance
6344     target_node = self.target_node
6345     migration_info = self.migration_info
6346
6347     abort_result = self.rpc.call_finalize_migration(target_node,
6348                                                     instance,
6349                                                     migration_info,
6350                                                     False)
6351     abort_msg = abort_result.fail_msg
6352     if abort_msg:
6353       logging.error("Aborting migration failed on target node %s: %s",
6354                     target_node, abort_msg)
6355       # Don't raise an exception here, as we stil have to try to revert the
6356       # disk status, even if this step failed.
6357
6358   def _ExecMigration(self):
6359     """Migrate an instance.
6360
6361     The migrate is done by:
6362       - change the disks into dual-master mode
6363       - wait until disks are fully synchronized again
6364       - migrate the instance
6365       - change disks on the new secondary node (the old primary) to secondary
6366       - wait until disks are fully synchronized
6367       - change disks into single-master mode
6368
6369     """
6370     instance = self.instance
6371     target_node = self.target_node
6372     source_node = self.source_node
6373
6374     self.feedback_fn("* checking disk consistency between source and target")
6375     for dev in instance.disks:
6376       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6377         raise errors.OpExecError("Disk %s is degraded or not fully"
6378                                  " synchronized on target node,"
6379                                  " aborting migrate." % dev.iv_name)
6380
6381     # First get the migration information from the remote node
6382     result = self.rpc.call_migration_info(source_node, instance)
6383     msg = result.fail_msg
6384     if msg:
6385       log_err = ("Failed fetching source migration information from %s: %s" %
6386                  (source_node, msg))
6387       logging.error(log_err)
6388       raise errors.OpExecError(log_err)
6389
6390     self.migration_info = migration_info = result.payload
6391
6392     # Then switch the disks to master/master mode
6393     self._EnsureSecondary(target_node)
6394     self._GoStandalone()
6395     self._GoReconnect(True)
6396     self._WaitUntilSync()
6397
6398     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6399     result = self.rpc.call_accept_instance(target_node,
6400                                            instance,
6401                                            migration_info,
6402                                            self.nodes_ip[target_node])
6403
6404     msg = result.fail_msg
6405     if msg:
6406       logging.error("Instance pre-migration failed, trying to revert"
6407                     " disk status: %s", msg)
6408       self.feedback_fn("Pre-migration failed, aborting")
6409       self._AbortMigration()
6410       self._RevertDiskStatus()
6411       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6412                                (instance.name, msg))
6413
6414     self.feedback_fn("* migrating instance to %s" % target_node)
6415     time.sleep(10)
6416     result = self.rpc.call_instance_migrate(source_node, instance,
6417                                             self.nodes_ip[target_node],
6418                                             self.live)
6419     msg = result.fail_msg
6420     if msg:
6421       logging.error("Instance migration failed, trying to revert"
6422                     " disk status: %s", msg)
6423       self.feedback_fn("Migration failed, aborting")
6424       self._AbortMigration()
6425       self._RevertDiskStatus()
6426       raise errors.OpExecError("Could not migrate instance %s: %s" %
6427                                (instance.name, msg))
6428     time.sleep(10)
6429
6430     instance.primary_node = target_node
6431     # distribute new instance config to the other nodes
6432     self.cfg.Update(instance, self.feedback_fn)
6433
6434     result = self.rpc.call_finalize_migration(target_node,
6435                                               instance,
6436                                               migration_info,
6437                                               True)
6438     msg = result.fail_msg
6439     if msg:
6440       logging.error("Instance migration succeeded, but finalization failed:"
6441                     " %s", msg)
6442       raise errors.OpExecError("Could not finalize instance migration: %s" %
6443                                msg)
6444
6445     self._EnsureSecondary(source_node)
6446     self._WaitUntilSync()
6447     self._GoStandalone()
6448     self._GoReconnect(False)
6449     self._WaitUntilSync()
6450
6451     self.feedback_fn("* done")
6452
6453   def Exec(self, feedback_fn):
6454     """Perform the migration.
6455
6456     """
6457     feedback_fn("Migrating instance %s" % self.instance.name)
6458
6459     self.feedback_fn = feedback_fn
6460
6461     self.source_node = self.instance.primary_node
6462     self.target_node = self.instance.secondary_nodes[0]
6463     self.all_nodes = [self.source_node, self.target_node]
6464     self.nodes_ip = {
6465       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6466       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6467       }
6468
6469     if self.cleanup:
6470       return self._ExecCleanup()
6471     else:
6472       return self._ExecMigration()
6473
6474
6475 def _CreateBlockDev(lu, node, instance, device, force_create,
6476                     info, force_open):
6477   """Create a tree of block devices on a given node.
6478
6479   If this device type has to be created on secondaries, create it and
6480   all its children.
6481
6482   If not, just recurse to children keeping the same 'force' value.
6483
6484   @param lu: the lu on whose behalf we execute
6485   @param node: the node on which to create the device
6486   @type instance: L{objects.Instance}
6487   @param instance: the instance which owns the device
6488   @type device: L{objects.Disk}
6489   @param device: the device to create
6490   @type force_create: boolean
6491   @param force_create: whether to force creation of this device; this
6492       will be change to True whenever we find a device which has
6493       CreateOnSecondary() attribute
6494   @param info: the extra 'metadata' we should attach to the device
6495       (this will be represented as a LVM tag)
6496   @type force_open: boolean
6497   @param force_open: this parameter will be passes to the
6498       L{backend.BlockdevCreate} function where it specifies
6499       whether we run on primary or not, and it affects both
6500       the child assembly and the device own Open() execution
6501
6502   """
6503   if device.CreateOnSecondary():
6504     force_create = True
6505
6506   if device.children:
6507     for child in device.children:
6508       _CreateBlockDev(lu, node, instance, child, force_create,
6509                       info, force_open)
6510
6511   if not force_create:
6512     return
6513
6514   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6515
6516
6517 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6518   """Create a single block device on a given node.
6519
6520   This will not recurse over children of the device, so they must be
6521   created in advance.
6522
6523   @param lu: the lu on whose behalf we execute
6524   @param node: the node on which to create the device
6525   @type instance: L{objects.Instance}
6526   @param instance: the instance which owns the device
6527   @type device: L{objects.Disk}
6528   @param device: the device to create
6529   @param info: the extra 'metadata' we should attach to the device
6530       (this will be represented as a LVM tag)
6531   @type force_open: boolean
6532   @param force_open: this parameter will be passes to the
6533       L{backend.BlockdevCreate} function where it specifies
6534       whether we run on primary or not, and it affects both
6535       the child assembly and the device own Open() execution
6536
6537   """
6538   lu.cfg.SetDiskID(device, node)
6539   result = lu.rpc.call_blockdev_create(node, device, device.size,
6540                                        instance.name, force_open, info)
6541   result.Raise("Can't create block device %s on"
6542                " node %s for instance %s" % (device, node, instance.name))
6543   if device.physical_id is None:
6544     device.physical_id = result.payload
6545
6546
6547 def _GenerateUniqueNames(lu, exts):
6548   """Generate a suitable LV name.
6549
6550   This will generate a logical volume name for the given instance.
6551
6552   """
6553   results = []
6554   for val in exts:
6555     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6556     results.append("%s%s" % (new_id, val))
6557   return results
6558
6559
6560 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgname, names, iv_name,
6561                          p_minor, s_minor):
6562   """Generate a drbd8 device complete with its children.
6563
6564   """
6565   port = lu.cfg.AllocatePort()
6566   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6567   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6568                           logical_id=(vgname, names[0]))
6569   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6570                           logical_id=(vgname, names[1]))
6571   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6572                           logical_id=(primary, secondary, port,
6573                                       p_minor, s_minor,
6574                                       shared_secret),
6575                           children=[dev_data, dev_meta],
6576                           iv_name=iv_name)
6577   return drbd_dev
6578
6579
6580 def _GenerateDiskTemplate(lu, template_name,
6581                           instance_name, primary_node,
6582                           secondary_nodes, disk_info,
6583                           file_storage_dir, file_driver,
6584                           base_index, feedback_fn):
6585   """Generate the entire disk layout for a given template type.
6586
6587   """
6588   #TODO: compute space requirements
6589
6590   vgname = lu.cfg.GetVGName()
6591   disk_count = len(disk_info)
6592   disks = []
6593   if template_name == constants.DT_DISKLESS:
6594     pass
6595   elif template_name == constants.DT_PLAIN:
6596     if len(secondary_nodes) != 0:
6597       raise errors.ProgrammerError("Wrong template configuration")
6598
6599     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6600                                       for i in range(disk_count)])
6601     for idx, disk in enumerate(disk_info):
6602       disk_index = idx + base_index
6603       vg = disk.get("vg", vgname)
6604       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
6605       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6606                               logical_id=(vg, names[idx]),
6607                               iv_name="disk/%d" % disk_index,
6608                               mode=disk["mode"])
6609       disks.append(disk_dev)
6610   elif template_name == constants.DT_DRBD8:
6611     if len(secondary_nodes) != 1:
6612       raise errors.ProgrammerError("Wrong template configuration")
6613     remote_node = secondary_nodes[0]
6614     minors = lu.cfg.AllocateDRBDMinor(
6615       [primary_node, remote_node] * len(disk_info), instance_name)
6616
6617     names = []
6618     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6619                                                for i in range(disk_count)]):
6620       names.append(lv_prefix + "_data")
6621       names.append(lv_prefix + "_meta")
6622     for idx, disk in enumerate(disk_info):
6623       disk_index = idx + base_index
6624       vg = disk.get("vg", vgname)
6625       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6626                                       disk["size"], vg, names[idx*2:idx*2+2],
6627                                       "disk/%d" % disk_index,
6628                                       minors[idx*2], minors[idx*2+1])
6629       disk_dev.mode = disk["mode"]
6630       disks.append(disk_dev)
6631   elif template_name == constants.DT_FILE:
6632     if len(secondary_nodes) != 0:
6633       raise errors.ProgrammerError("Wrong template configuration")
6634
6635     _RequireFileStorage()
6636
6637     for idx, disk in enumerate(disk_info):
6638       disk_index = idx + base_index
6639       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6640                               iv_name="disk/%d" % disk_index,
6641                               logical_id=(file_driver,
6642                                           "%s/disk%d" % (file_storage_dir,
6643                                                          disk_index)),
6644                               mode=disk["mode"])
6645       disks.append(disk_dev)
6646   else:
6647     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6648   return disks
6649
6650
6651 def _GetInstanceInfoText(instance):
6652   """Compute that text that should be added to the disk's metadata.
6653
6654   """
6655   return "originstname+%s" % instance.name
6656
6657
6658 def _CalcEta(time_taken, written, total_size):
6659   """Calculates the ETA based on size written and total size.
6660
6661   @param time_taken: The time taken so far
6662   @param written: amount written so far
6663   @param total_size: The total size of data to be written
6664   @return: The remaining time in seconds
6665
6666   """
6667   avg_time = time_taken / float(written)
6668   return (total_size - written) * avg_time
6669
6670
6671 def _WipeDisks(lu, instance):
6672   """Wipes instance disks.
6673
6674   @type lu: L{LogicalUnit}
6675   @param lu: the logical unit on whose behalf we execute
6676   @type instance: L{objects.Instance}
6677   @param instance: the instance whose disks we should create
6678   @return: the success of the wipe
6679
6680   """
6681   node = instance.primary_node
6682   for idx, device in enumerate(instance.disks):
6683     lu.LogInfo("* Wiping disk %d", idx)
6684     logging.info("Wiping disk %d for instance %s", idx, instance.name)
6685
6686     # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6687     # MAX_WIPE_CHUNK at max
6688     wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6689                           constants.MIN_WIPE_CHUNK_PERCENT)
6690
6691     offset = 0
6692     size = device.size
6693     last_output = 0
6694     start_time = time.time()
6695
6696     while offset < size:
6697       wipe_size = min(wipe_chunk_size, size - offset)
6698       result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6699       result.Raise("Could not wipe disk %d at offset %d for size %d" %
6700                    (idx, offset, wipe_size))
6701       now = time.time()
6702       offset += wipe_size
6703       if now - last_output >= 60:
6704         eta = _CalcEta(now - start_time, offset, size)
6705         lu.LogInfo(" - done: %.1f%% ETA: %s" %
6706                    (offset / float(size) * 100, utils.FormatSeconds(eta)))
6707         last_output = now
6708
6709
6710 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6711   """Create all disks for an instance.
6712
6713   This abstracts away some work from AddInstance.
6714
6715   @type lu: L{LogicalUnit}
6716   @param lu: the logical unit on whose behalf we execute
6717   @type instance: L{objects.Instance}
6718   @param instance: the instance whose disks we should create
6719   @type to_skip: list
6720   @param to_skip: list of indices to skip
6721   @type target_node: string
6722   @param target_node: if passed, overrides the target node for creation
6723   @rtype: boolean
6724   @return: the success of the creation
6725
6726   """
6727   info = _GetInstanceInfoText(instance)
6728   if target_node is None:
6729     pnode = instance.primary_node
6730     all_nodes = instance.all_nodes
6731   else:
6732     pnode = target_node
6733     all_nodes = [pnode]
6734
6735   if instance.disk_template == constants.DT_FILE:
6736     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6737     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6738
6739     result.Raise("Failed to create directory '%s' on"
6740                  " node %s" % (file_storage_dir, pnode))
6741
6742   # Note: this needs to be kept in sync with adding of disks in
6743   # LUSetInstanceParams
6744   for idx, device in enumerate(instance.disks):
6745     if to_skip and idx in to_skip:
6746       continue
6747     logging.info("Creating volume %s for instance %s",
6748                  device.iv_name, instance.name)
6749     #HARDCODE
6750     for node in all_nodes:
6751       f_create = node == pnode
6752       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6753
6754
6755 def _RemoveDisks(lu, instance, target_node=None):
6756   """Remove all disks for an instance.
6757
6758   This abstracts away some work from `AddInstance()` and
6759   `RemoveInstance()`. Note that in case some of the devices couldn't
6760   be removed, the removal will continue with the other ones (compare
6761   with `_CreateDisks()`).
6762
6763   @type lu: L{LogicalUnit}
6764   @param lu: the logical unit on whose behalf we execute
6765   @type instance: L{objects.Instance}
6766   @param instance: the instance whose disks we should remove
6767   @type target_node: string
6768   @param target_node: used to override the node on which to remove the disks
6769   @rtype: boolean
6770   @return: the success of the removal
6771
6772   """
6773   logging.info("Removing block devices for instance %s", instance.name)
6774
6775   all_result = True
6776   for device in instance.disks:
6777     if target_node:
6778       edata = [(target_node, device)]
6779     else:
6780       edata = device.ComputeNodeTree(instance.primary_node)
6781     for node, disk in edata:
6782       lu.cfg.SetDiskID(disk, node)
6783       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6784       if msg:
6785         lu.LogWarning("Could not remove block device %s on node %s,"
6786                       " continuing anyway: %s", device.iv_name, node, msg)
6787         all_result = False
6788
6789   if instance.disk_template == constants.DT_FILE:
6790     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6791     if target_node:
6792       tgt = target_node
6793     else:
6794       tgt = instance.primary_node
6795     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6796     if result.fail_msg:
6797       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6798                     file_storage_dir, instance.primary_node, result.fail_msg)
6799       all_result = False
6800
6801   return all_result
6802
6803
6804 def _ComputeDiskSizePerVG(disk_template, disks):
6805   """Compute disk size requirements in the volume group
6806
6807   """
6808   def _compute(disks, payload):
6809     """Universal algorithm
6810
6811     """
6812     vgs = {}
6813     for disk in disks:
6814       vgs[disk["vg"]] = vgs.get("vg", 0) + disk["size"] + payload
6815
6816     return vgs
6817
6818   # Required free disk space as a function of disk and swap space
6819   req_size_dict = {
6820     constants.DT_DISKLESS: None,
6821     constants.DT_PLAIN: _compute(disks, 0),
6822     # 128 MB are added for drbd metadata for each disk
6823     constants.DT_DRBD8: _compute(disks, 128),
6824     constants.DT_FILE: None,
6825   }
6826
6827   if disk_template not in req_size_dict:
6828     raise errors.ProgrammerError("Disk template '%s' size requirement"
6829                                  " is unknown" %  disk_template)
6830
6831   return req_size_dict[disk_template]
6832
6833 def _ComputeDiskSize(disk_template, disks):
6834   """Compute disk size requirements in the volume group
6835
6836   """
6837   # Required free disk space as a function of disk and swap space
6838   req_size_dict = {
6839     constants.DT_DISKLESS: None,
6840     constants.DT_PLAIN: sum(d["size"] for d in disks),
6841     # 128 MB are added for drbd metadata for each disk
6842     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6843     constants.DT_FILE: None,
6844   }
6845
6846   if disk_template not in req_size_dict:
6847     raise errors.ProgrammerError("Disk template '%s' size requirement"
6848                                  " is unknown" %  disk_template)
6849
6850   return req_size_dict[disk_template]
6851
6852
6853 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6854   """Hypervisor parameter validation.
6855
6856   This function abstract the hypervisor parameter validation to be
6857   used in both instance create and instance modify.
6858
6859   @type lu: L{LogicalUnit}
6860   @param lu: the logical unit for which we check
6861   @type nodenames: list
6862   @param nodenames: the list of nodes on which we should check
6863   @type hvname: string
6864   @param hvname: the name of the hypervisor we should use
6865   @type hvparams: dict
6866   @param hvparams: the parameters which we need to check
6867   @raise errors.OpPrereqError: if the parameters are not valid
6868
6869   """
6870   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6871                                                   hvname,
6872                                                   hvparams)
6873   for node in nodenames:
6874     info = hvinfo[node]
6875     if info.offline:
6876       continue
6877     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6878
6879
6880 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6881   """OS parameters validation.
6882
6883   @type lu: L{LogicalUnit}
6884   @param lu: the logical unit for which we check
6885   @type required: boolean
6886   @param required: whether the validation should fail if the OS is not
6887       found
6888   @type nodenames: list
6889   @param nodenames: the list of nodes on which we should check
6890   @type osname: string
6891   @param osname: the name of the hypervisor we should use
6892   @type osparams: dict
6893   @param osparams: the parameters which we need to check
6894   @raise errors.OpPrereqError: if the parameters are not valid
6895
6896   """
6897   result = lu.rpc.call_os_validate(required, nodenames, osname,
6898                                    [constants.OS_VALIDATE_PARAMETERS],
6899                                    osparams)
6900   for node, nres in result.items():
6901     # we don't check for offline cases since this should be run only
6902     # against the master node and/or an instance's nodes
6903     nres.Raise("OS Parameters validation failed on node %s" % node)
6904     if not nres.payload:
6905       lu.LogInfo("OS %s not found on node %s, validation skipped",
6906                  osname, node)
6907
6908
6909 class LUCreateInstance(LogicalUnit):
6910   """Create an instance.
6911
6912   """
6913   HPATH = "instance-add"
6914   HTYPE = constants.HTYPE_INSTANCE
6915   _OP_PARAMS = [
6916     _PInstanceName,
6917     ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
6918     ("start", True, ht.TBool),
6919     ("wait_for_sync", True, ht.TBool),
6920     ("ip_check", True, ht.TBool),
6921     ("name_check", True, ht.TBool),
6922     ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
6923     ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
6924     ("hvparams", ht.EmptyDict, ht.TDict),
6925     ("beparams", ht.EmptyDict, ht.TDict),
6926     ("osparams", ht.EmptyDict, ht.TDict),
6927     ("no_install", None, ht.TMaybeBool),
6928     ("os_type", None, ht.TMaybeString),
6929     ("force_variant", False, ht.TBool),
6930     ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
6931     ("source_x509_ca", None, ht.TMaybeString),
6932     ("source_instance_name", None, ht.TMaybeString),
6933     ("src_node", None, ht.TMaybeString),
6934     ("src_path", None, ht.TMaybeString),
6935     ("pnode", None, ht.TMaybeString),
6936     ("snode", None, ht.TMaybeString),
6937     ("iallocator", None, ht.TMaybeString),
6938     ("hypervisor", None, ht.TMaybeString),
6939     ("disk_template", ht.NoDefault, _CheckDiskTemplate),
6940     ("identify_defaults", False, ht.TBool),
6941     ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
6942     ("file_storage_dir", None, ht.TMaybeString),
6943     ]
6944   REQ_BGL = False
6945
6946   def CheckArguments(self):
6947     """Check arguments.
6948
6949     """
6950     # do not require name_check to ease forward/backward compatibility
6951     # for tools
6952     if self.op.no_install and self.op.start:
6953       self.LogInfo("No-installation mode selected, disabling startup")
6954       self.op.start = False
6955     # validate/normalize the instance name
6956     self.op.instance_name = \
6957       netutils.Hostname.GetNormalizedName(self.op.instance_name)
6958
6959     if self.op.ip_check and not self.op.name_check:
6960       # TODO: make the ip check more flexible and not depend on the name check
6961       raise errors.OpPrereqError("Cannot do ip check without a name check",
6962                                  errors.ECODE_INVAL)
6963
6964     # check nics' parameter names
6965     for nic in self.op.nics:
6966       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6967
6968     # check disks. parameter names and consistent adopt/no-adopt strategy
6969     has_adopt = has_no_adopt = False
6970     for disk in self.op.disks:
6971       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6972       if "adopt" in disk:
6973         has_adopt = True
6974       else:
6975         has_no_adopt = True
6976     if has_adopt and has_no_adopt:
6977       raise errors.OpPrereqError("Either all disks are adopted or none is",
6978                                  errors.ECODE_INVAL)
6979     if has_adopt:
6980       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6981         raise errors.OpPrereqError("Disk adoption is not supported for the"
6982                                    " '%s' disk template" %
6983                                    self.op.disk_template,
6984                                    errors.ECODE_INVAL)
6985       if self.op.iallocator is not None:
6986         raise errors.OpPrereqError("Disk adoption not allowed with an"
6987                                    " iallocator script", errors.ECODE_INVAL)
6988       if self.op.mode == constants.INSTANCE_IMPORT:
6989         raise errors.OpPrereqError("Disk adoption not allowed for"
6990                                    " instance import", errors.ECODE_INVAL)
6991
6992     self.adopt_disks = has_adopt
6993
6994     # instance name verification
6995     if self.op.name_check:
6996       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6997       self.op.instance_name = self.hostname1.name
6998       # used in CheckPrereq for ip ping check
6999       self.check_ip = self.hostname1.ip
7000     else:
7001       self.check_ip = None
7002
7003     # file storage checks
7004     if (self.op.file_driver and
7005         not self.op.file_driver in constants.FILE_DRIVER):
7006       raise errors.OpPrereqError("Invalid file driver name '%s'" %
7007                                  self.op.file_driver, errors.ECODE_INVAL)
7008
7009     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7010       raise errors.OpPrereqError("File storage directory path not absolute",
7011                                  errors.ECODE_INVAL)
7012
7013     ### Node/iallocator related checks
7014     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7015
7016     if self.op.pnode is not None:
7017       if self.op.disk_template in constants.DTS_NET_MIRROR:
7018         if self.op.snode is None:
7019           raise errors.OpPrereqError("The networked disk templates need"
7020                                      " a mirror node", errors.ECODE_INVAL)
7021       elif self.op.snode:
7022         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7023                         " template")
7024         self.op.snode = None
7025
7026     self._cds = _GetClusterDomainSecret()
7027
7028     if self.op.mode == constants.INSTANCE_IMPORT:
7029       # On import force_variant must be True, because if we forced it at
7030       # initial install, our only chance when importing it back is that it
7031       # works again!
7032       self.op.force_variant = True
7033
7034       if self.op.no_install:
7035         self.LogInfo("No-installation mode has no effect during import")
7036
7037     elif self.op.mode == constants.INSTANCE_CREATE:
7038       if self.op.os_type is None:
7039         raise errors.OpPrereqError("No guest OS specified",
7040                                    errors.ECODE_INVAL)
7041       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7042         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7043                                    " installation" % self.op.os_type,
7044                                    errors.ECODE_STATE)
7045       if self.op.disk_template is None:
7046         raise errors.OpPrereqError("No disk template specified",
7047                                    errors.ECODE_INVAL)
7048
7049     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7050       # Check handshake to ensure both clusters have the same domain secret
7051       src_handshake = self.op.source_handshake
7052       if not src_handshake:
7053         raise errors.OpPrereqError("Missing source handshake",
7054                                    errors.ECODE_INVAL)
7055
7056       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7057                                                            src_handshake)
7058       if errmsg:
7059         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7060                                    errors.ECODE_INVAL)
7061
7062       # Load and check source CA
7063       self.source_x509_ca_pem = self.op.source_x509_ca
7064       if not self.source_x509_ca_pem:
7065         raise errors.OpPrereqError("Missing source X509 CA",
7066                                    errors.ECODE_INVAL)
7067
7068       try:
7069         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7070                                                     self._cds)
7071       except OpenSSL.crypto.Error, err:
7072         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7073                                    (err, ), errors.ECODE_INVAL)
7074
7075       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7076       if errcode is not None:
7077         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7078                                    errors.ECODE_INVAL)
7079
7080       self.source_x509_ca = cert
7081
7082       src_instance_name = self.op.source_instance_name
7083       if not src_instance_name:
7084         raise errors.OpPrereqError("Missing source instance name",
7085                                    errors.ECODE_INVAL)
7086
7087       self.source_instance_name = \
7088           netutils.GetHostname(name=src_instance_name).name
7089
7090     else:
7091       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7092                                  self.op.mode, errors.ECODE_INVAL)
7093
7094   def ExpandNames(self):
7095     """ExpandNames for CreateInstance.
7096
7097     Figure out the right locks for instance creation.
7098
7099     """
7100     self.needed_locks = {}
7101
7102     instance_name = self.op.instance_name
7103     # this is just a preventive check, but someone might still add this
7104     # instance in the meantime, and creation will fail at lock-add time
7105     if instance_name in self.cfg.GetInstanceList():
7106       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7107                                  instance_name, errors.ECODE_EXISTS)
7108
7109     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7110
7111     if self.op.iallocator:
7112       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7113     else:
7114       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7115       nodelist = [self.op.pnode]
7116       if self.op.snode is not None:
7117         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7118         nodelist.append(self.op.snode)
7119       self.needed_locks[locking.LEVEL_NODE] = nodelist
7120
7121     # in case of import lock the source node too
7122     if self.op.mode == constants.INSTANCE_IMPORT:
7123       src_node = self.op.src_node
7124       src_path = self.op.src_path
7125
7126       if src_path is None:
7127         self.op.src_path = src_path = self.op.instance_name
7128
7129       if src_node is None:
7130         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7131         self.op.src_node = None
7132         if os.path.isabs(src_path):
7133           raise errors.OpPrereqError("Importing an instance from an absolute"
7134                                      " path requires a source node option.",
7135                                      errors.ECODE_INVAL)
7136       else:
7137         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7138         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7139           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7140         if not os.path.isabs(src_path):
7141           self.op.src_path = src_path = \
7142             utils.PathJoin(constants.EXPORT_DIR, src_path)
7143
7144   def _RunAllocator(self):
7145     """Run the allocator based on input opcode.
7146
7147     """
7148     nics = [n.ToDict() for n in self.nics]
7149     ial = IAllocator(self.cfg, self.rpc,
7150                      mode=constants.IALLOCATOR_MODE_ALLOC,
7151                      name=self.op.instance_name,
7152                      disk_template=self.op.disk_template,
7153                      tags=[],
7154                      os=self.op.os_type,
7155                      vcpus=self.be_full[constants.BE_VCPUS],
7156                      mem_size=self.be_full[constants.BE_MEMORY],
7157                      disks=self.disks,
7158                      nics=nics,
7159                      hypervisor=self.op.hypervisor,
7160                      )
7161
7162     ial.Run(self.op.iallocator)
7163
7164     if not ial.success:
7165       raise errors.OpPrereqError("Can't compute nodes using"
7166                                  " iallocator '%s': %s" %
7167                                  (self.op.iallocator, ial.info),
7168                                  errors.ECODE_NORES)
7169     if len(ial.result) != ial.required_nodes:
7170       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7171                                  " of nodes (%s), required %s" %
7172                                  (self.op.iallocator, len(ial.result),
7173                                   ial.required_nodes), errors.ECODE_FAULT)
7174     self.op.pnode = ial.result[0]
7175     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7176                  self.op.instance_name, self.op.iallocator,
7177                  utils.CommaJoin(ial.result))
7178     if ial.required_nodes == 2:
7179       self.op.snode = ial.result[1]
7180
7181   def BuildHooksEnv(self):
7182     """Build hooks env.
7183
7184     This runs on master, primary and secondary nodes of the instance.
7185
7186     """
7187     env = {
7188       "ADD_MODE": self.op.mode,
7189       }
7190     if self.op.mode == constants.INSTANCE_IMPORT:
7191       env["SRC_NODE"] = self.op.src_node
7192       env["SRC_PATH"] = self.op.src_path
7193       env["SRC_IMAGES"] = self.src_images
7194
7195     env.update(_BuildInstanceHookEnv(
7196       name=self.op.instance_name,
7197       primary_node=self.op.pnode,
7198       secondary_nodes=self.secondaries,
7199       status=self.op.start,
7200       os_type=self.op.os_type,
7201       memory=self.be_full[constants.BE_MEMORY],
7202       vcpus=self.be_full[constants.BE_VCPUS],
7203       nics=_NICListToTuple(self, self.nics),
7204       disk_template=self.op.disk_template,
7205       disks=[(d["size"], d["mode"]) for d in self.disks],
7206       bep=self.be_full,
7207       hvp=self.hv_full,
7208       hypervisor_name=self.op.hypervisor,
7209     ))
7210
7211     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7212           self.secondaries)
7213     return env, nl, nl
7214
7215   def _ReadExportInfo(self):
7216     """Reads the export information from disk.
7217
7218     It will override the opcode source node and path with the actual
7219     information, if these two were not specified before.
7220
7221     @return: the export information
7222
7223     """
7224     assert self.op.mode == constants.INSTANCE_IMPORT
7225
7226     src_node = self.op.src_node
7227     src_path = self.op.src_path
7228
7229     if src_node is None:
7230       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7231       exp_list = self.rpc.call_export_list(locked_nodes)
7232       found = False
7233       for node in exp_list:
7234         if exp_list[node].fail_msg:
7235           continue
7236         if src_path in exp_list[node].payload:
7237           found = True
7238           self.op.src_node = src_node = node
7239           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7240                                                        src_path)
7241           break
7242       if not found:
7243         raise errors.OpPrereqError("No export found for relative path %s" %
7244                                     src_path, errors.ECODE_INVAL)
7245
7246     _CheckNodeOnline(self, src_node)
7247     result = self.rpc.call_export_info(src_node, src_path)
7248     result.Raise("No export or invalid export found in dir %s" % src_path)
7249
7250     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7251     if not export_info.has_section(constants.INISECT_EXP):
7252       raise errors.ProgrammerError("Corrupted export config",
7253                                    errors.ECODE_ENVIRON)
7254
7255     ei_version = export_info.get(constants.INISECT_EXP, "version")
7256     if (int(ei_version) != constants.EXPORT_VERSION):
7257       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7258                                  (ei_version, constants.EXPORT_VERSION),
7259                                  errors.ECODE_ENVIRON)
7260     return export_info
7261
7262   def _ReadExportParams(self, einfo):
7263     """Use export parameters as defaults.
7264
7265     In case the opcode doesn't specify (as in override) some instance
7266     parameters, then try to use them from the export information, if
7267     that declares them.
7268
7269     """
7270     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7271
7272     if self.op.disk_template is None:
7273       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7274         self.op.disk_template = einfo.get(constants.INISECT_INS,
7275                                           "disk_template")
7276       else:
7277         raise errors.OpPrereqError("No disk template specified and the export"
7278                                    " is missing the disk_template information",
7279                                    errors.ECODE_INVAL)
7280
7281     if not self.op.disks:
7282       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7283         disks = []
7284         # TODO: import the disk iv_name too
7285         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7286           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7287           disks.append({"size": disk_sz})
7288         self.op.disks = disks
7289       else:
7290         raise errors.OpPrereqError("No disk info specified and the export"
7291                                    " is missing the disk information",
7292                                    errors.ECODE_INVAL)
7293
7294     if (not self.op.nics and
7295         einfo.has_option(constants.INISECT_INS, "nic_count")):
7296       nics = []
7297       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7298         ndict = {}
7299         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7300           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7301           ndict[name] = v
7302         nics.append(ndict)
7303       self.op.nics = nics
7304
7305     if (self.op.hypervisor is None and
7306         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7307       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7308     if einfo.has_section(constants.INISECT_HYP):
7309       # use the export parameters but do not override the ones
7310       # specified by the user
7311       for name, value in einfo.items(constants.INISECT_HYP):
7312         if name not in self.op.hvparams:
7313           self.op.hvparams[name] = value
7314
7315     if einfo.has_section(constants.INISECT_BEP):
7316       # use the parameters, without overriding
7317       for name, value in einfo.items(constants.INISECT_BEP):
7318         if name not in self.op.beparams:
7319           self.op.beparams[name] = value
7320     else:
7321       # try to read the parameters old style, from the main section
7322       for name in constants.BES_PARAMETERS:
7323         if (name not in self.op.beparams and
7324             einfo.has_option(constants.INISECT_INS, name)):
7325           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7326
7327     if einfo.has_section(constants.INISECT_OSP):
7328       # use the parameters, without overriding
7329       for name, value in einfo.items(constants.INISECT_OSP):
7330         if name not in self.op.osparams:
7331           self.op.osparams[name] = value
7332
7333   def _RevertToDefaults(self, cluster):
7334     """Revert the instance parameters to the default values.
7335
7336     """
7337     # hvparams
7338     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7339     for name in self.op.hvparams.keys():
7340       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7341         del self.op.hvparams[name]
7342     # beparams
7343     be_defs = cluster.SimpleFillBE({})
7344     for name in self.op.beparams.keys():
7345       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7346         del self.op.beparams[name]
7347     # nic params
7348     nic_defs = cluster.SimpleFillNIC({})
7349     for nic in self.op.nics:
7350       for name in constants.NICS_PARAMETERS:
7351         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7352           del nic[name]
7353     # osparams
7354     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7355     for name in self.op.osparams.keys():
7356       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7357         del self.op.osparams[name]
7358
7359   def CheckPrereq(self):
7360     """Check prerequisites.
7361
7362     """
7363     if self.op.mode == constants.INSTANCE_IMPORT:
7364       export_info = self._ReadExportInfo()
7365       self._ReadExportParams(export_info)
7366
7367     _CheckDiskTemplate(self.op.disk_template)
7368
7369     if (not self.cfg.GetVGName() and
7370         self.op.disk_template not in constants.DTS_NOT_LVM):
7371       raise errors.OpPrereqError("Cluster does not support lvm-based"
7372                                  " instances", errors.ECODE_STATE)
7373
7374     if self.op.hypervisor is None:
7375       self.op.hypervisor = self.cfg.GetHypervisorType()
7376
7377     cluster = self.cfg.GetClusterInfo()
7378     enabled_hvs = cluster.enabled_hypervisors
7379     if self.op.hypervisor not in enabled_hvs:
7380       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7381                                  " cluster (%s)" % (self.op.hypervisor,
7382                                   ",".join(enabled_hvs)),
7383                                  errors.ECODE_STATE)
7384
7385     # check hypervisor parameter syntax (locally)
7386     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7387     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7388                                       self.op.hvparams)
7389     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7390     hv_type.CheckParameterSyntax(filled_hvp)
7391     self.hv_full = filled_hvp
7392     # check that we don't specify global parameters on an instance
7393     _CheckGlobalHvParams(self.op.hvparams)
7394
7395     # fill and remember the beparams dict
7396     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7397     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7398
7399     # build os parameters
7400     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7401
7402     # now that hvp/bep are in final format, let's reset to defaults,
7403     # if told to do so
7404     if self.op.identify_defaults:
7405       self._RevertToDefaults(cluster)
7406
7407     # NIC buildup
7408     self.nics = []
7409     for idx, nic in enumerate(self.op.nics):
7410       nic_mode_req = nic.get("mode", None)
7411       nic_mode = nic_mode_req
7412       if nic_mode is None:
7413         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7414
7415       # in routed mode, for the first nic, the default ip is 'auto'
7416       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7417         default_ip_mode = constants.VALUE_AUTO
7418       else:
7419         default_ip_mode = constants.VALUE_NONE
7420
7421       # ip validity checks
7422       ip = nic.get("ip", default_ip_mode)
7423       if ip is None or ip.lower() == constants.VALUE_NONE:
7424         nic_ip = None
7425       elif ip.lower() == constants.VALUE_AUTO:
7426         if not self.op.name_check:
7427           raise errors.OpPrereqError("IP address set to auto but name checks"
7428                                      " have been skipped",
7429                                      errors.ECODE_INVAL)
7430         nic_ip = self.hostname1.ip
7431       else:
7432         if not netutils.IPAddress.IsValid(ip):
7433           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7434                                      errors.ECODE_INVAL)
7435         nic_ip = ip
7436
7437       # TODO: check the ip address for uniqueness
7438       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7439         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7440                                    errors.ECODE_INVAL)
7441
7442       # MAC address verification
7443       mac = nic.get("mac", constants.VALUE_AUTO)
7444       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7445         mac = utils.NormalizeAndValidateMac(mac)
7446
7447         try:
7448           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7449         except errors.ReservationError:
7450           raise errors.OpPrereqError("MAC address %s already in use"
7451                                      " in cluster" % mac,
7452                                      errors.ECODE_NOTUNIQUE)
7453
7454       # bridge verification
7455       bridge = nic.get("bridge", None)
7456       link = nic.get("link", None)
7457       if bridge and link:
7458         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7459                                    " at the same time", errors.ECODE_INVAL)
7460       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7461         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7462                                    errors.ECODE_INVAL)
7463       elif bridge:
7464         link = bridge
7465
7466       nicparams = {}
7467       if nic_mode_req:
7468         nicparams[constants.NIC_MODE] = nic_mode_req
7469       if link:
7470         nicparams[constants.NIC_LINK] = link
7471
7472       check_params = cluster.SimpleFillNIC(nicparams)
7473       objects.NIC.CheckParameterSyntax(check_params)
7474       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7475
7476     # disk checks/pre-build
7477     self.disks = []
7478     for disk in self.op.disks:
7479       mode = disk.get("mode", constants.DISK_RDWR)
7480       if mode not in constants.DISK_ACCESS_SET:
7481         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7482                                    mode, errors.ECODE_INVAL)
7483       size = disk.get("size", None)
7484       if size is None:
7485         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7486       try:
7487         size = int(size)
7488       except (TypeError, ValueError):
7489         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7490                                    errors.ECODE_INVAL)
7491       vg = disk.get("vg", self.cfg.GetVGName())
7492       new_disk = {"size": size, "mode": mode, "vg": vg}
7493       if "adopt" in disk:
7494         new_disk["adopt"] = disk["adopt"]
7495       self.disks.append(new_disk)
7496
7497     if self.op.mode == constants.INSTANCE_IMPORT:
7498
7499       # Check that the new instance doesn't have less disks than the export
7500       instance_disks = len(self.disks)
7501       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7502       if instance_disks < export_disks:
7503         raise errors.OpPrereqError("Not enough disks to import."
7504                                    " (instance: %d, export: %d)" %
7505                                    (instance_disks, export_disks),
7506                                    errors.ECODE_INVAL)
7507
7508       disk_images = []
7509       for idx in range(export_disks):
7510         option = 'disk%d_dump' % idx
7511         if export_info.has_option(constants.INISECT_INS, option):
7512           # FIXME: are the old os-es, disk sizes, etc. useful?
7513           export_name = export_info.get(constants.INISECT_INS, option)
7514           image = utils.PathJoin(self.op.src_path, export_name)
7515           disk_images.append(image)
7516         else:
7517           disk_images.append(False)
7518
7519       self.src_images = disk_images
7520
7521       old_name = export_info.get(constants.INISECT_INS, 'name')
7522       try:
7523         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7524       except (TypeError, ValueError), err:
7525         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7526                                    " an integer: %s" % str(err),
7527                                    errors.ECODE_STATE)
7528       if self.op.instance_name == old_name:
7529         for idx, nic in enumerate(self.nics):
7530           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7531             nic_mac_ini = 'nic%d_mac' % idx
7532             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7533
7534     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7535
7536     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7537     if self.op.ip_check:
7538       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7539         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7540                                    (self.check_ip, self.op.instance_name),
7541                                    errors.ECODE_NOTUNIQUE)
7542
7543     #### mac address generation
7544     # By generating here the mac address both the allocator and the hooks get
7545     # the real final mac address rather than the 'auto' or 'generate' value.
7546     # There is a race condition between the generation and the instance object
7547     # creation, which means that we know the mac is valid now, but we're not
7548     # sure it will be when we actually add the instance. If things go bad
7549     # adding the instance will abort because of a duplicate mac, and the
7550     # creation job will fail.
7551     for nic in self.nics:
7552       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7553         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7554
7555     #### allocator run
7556
7557     if self.op.iallocator is not None:
7558       self._RunAllocator()
7559
7560     #### node related checks
7561
7562     # check primary node
7563     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7564     assert self.pnode is not None, \
7565       "Cannot retrieve locked node %s" % self.op.pnode
7566     if pnode.offline:
7567       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7568                                  pnode.name, errors.ECODE_STATE)
7569     if pnode.drained:
7570       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7571                                  pnode.name, errors.ECODE_STATE)
7572     if not pnode.vm_capable:
7573       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7574                                  " '%s'" % pnode.name, errors.ECODE_STATE)
7575
7576     self.secondaries = []
7577
7578     # mirror node verification
7579     if self.op.disk_template in constants.DTS_NET_MIRROR:
7580       if self.op.snode == pnode.name:
7581         raise errors.OpPrereqError("The secondary node cannot be the"
7582                                    " primary node.", errors.ECODE_INVAL)
7583       _CheckNodeOnline(self, self.op.snode)
7584       _CheckNodeNotDrained(self, self.op.snode)
7585       _CheckNodeVmCapable(self, self.op.snode)
7586       self.secondaries.append(self.op.snode)
7587
7588     nodenames = [pnode.name] + self.secondaries
7589
7590     if not self.adopt_disks:
7591       # Check lv size requirements, if not adopting
7592       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
7593       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
7594
7595     else: # instead, we must check the adoption data
7596       all_lvs = set([i["adopt"] for i in self.disks])
7597       if len(all_lvs) != len(self.disks):
7598         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7599                                    errors.ECODE_INVAL)
7600       for lv_name in all_lvs:
7601         try:
7602           # FIXME: VG must be provided here. Else all LVs with the
7603           # same name will be locked on all VGs.
7604           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7605         except errors.ReservationError:
7606           raise errors.OpPrereqError("LV named %s used by another instance" %
7607                                      lv_name, errors.ECODE_NOTUNIQUE)
7608
7609       node_lvs = self.rpc.call_lv_list([pnode.name],
7610                                        self.cfg.GetVGName())[pnode.name]
7611       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7612       node_lvs = node_lvs.payload
7613       delta = all_lvs.difference(node_lvs.keys())
7614       if delta:
7615         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7616                                    utils.CommaJoin(delta),
7617                                    errors.ECODE_INVAL)
7618       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7619       if online_lvs:
7620         raise errors.OpPrereqError("Online logical volumes found, cannot"
7621                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7622                                    errors.ECODE_STATE)
7623       # update the size of disk based on what is found
7624       for dsk in self.disks:
7625         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7626
7627     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7628
7629     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7630     # check OS parameters (remotely)
7631     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7632
7633     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7634
7635     # memory check on primary node
7636     if self.op.start:
7637       _CheckNodeFreeMemory(self, self.pnode.name,
7638                            "creating instance %s" % self.op.instance_name,
7639                            self.be_full[constants.BE_MEMORY],
7640                            self.op.hypervisor)
7641
7642     self.dry_run_result = list(nodenames)
7643
7644   def Exec(self, feedback_fn):
7645     """Create and add the instance to the cluster.
7646
7647     """
7648     instance = self.op.instance_name
7649     pnode_name = self.pnode.name
7650
7651     ht_kind = self.op.hypervisor
7652     if ht_kind in constants.HTS_REQ_PORT:
7653       network_port = self.cfg.AllocatePort()
7654     else:
7655       network_port = None
7656
7657     if constants.ENABLE_FILE_STORAGE:
7658       # this is needed because os.path.join does not accept None arguments
7659       if self.op.file_storage_dir is None:
7660         string_file_storage_dir = ""
7661       else:
7662         string_file_storage_dir = self.op.file_storage_dir
7663
7664       # build the full file storage dir path
7665       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7666                                         string_file_storage_dir, instance)
7667     else:
7668       file_storage_dir = ""
7669
7670     disks = _GenerateDiskTemplate(self,
7671                                   self.op.disk_template,
7672                                   instance, pnode_name,
7673                                   self.secondaries,
7674                                   self.disks,
7675                                   file_storage_dir,
7676                                   self.op.file_driver,
7677                                   0,
7678                                   feedback_fn)
7679
7680     iobj = objects.Instance(name=instance, os=self.op.os_type,
7681                             primary_node=pnode_name,
7682                             nics=self.nics, disks=disks,
7683                             disk_template=self.op.disk_template,
7684                             admin_up=False,
7685                             network_port=network_port,
7686                             beparams=self.op.beparams,
7687                             hvparams=self.op.hvparams,
7688                             hypervisor=self.op.hypervisor,
7689                             osparams=self.op.osparams,
7690                             )
7691
7692     if self.adopt_disks:
7693       # rename LVs to the newly-generated names; we need to construct
7694       # 'fake' LV disks with the old data, plus the new unique_id
7695       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7696       rename_to = []
7697       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7698         rename_to.append(t_dsk.logical_id)
7699         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7700         self.cfg.SetDiskID(t_dsk, pnode_name)
7701       result = self.rpc.call_blockdev_rename(pnode_name,
7702                                              zip(tmp_disks, rename_to))
7703       result.Raise("Failed to rename adoped LVs")
7704     else:
7705       feedback_fn("* creating instance disks...")
7706       try:
7707         _CreateDisks(self, iobj)
7708       except errors.OpExecError:
7709         self.LogWarning("Device creation failed, reverting...")
7710         try:
7711           _RemoveDisks(self, iobj)
7712         finally:
7713           self.cfg.ReleaseDRBDMinors(instance)
7714           raise
7715
7716       if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7717         feedback_fn("* wiping instance disks...")
7718         try:
7719           _WipeDisks(self, iobj)
7720         except errors.OpExecError:
7721           self.LogWarning("Device wiping failed, reverting...")
7722           try:
7723             _RemoveDisks(self, iobj)
7724           finally:
7725             self.cfg.ReleaseDRBDMinors(instance)
7726             raise
7727
7728     feedback_fn("adding instance %s to cluster config" % instance)
7729
7730     self.cfg.AddInstance(iobj, self.proc.GetECId())
7731
7732     # Declare that we don't want to remove the instance lock anymore, as we've
7733     # added the instance to the config
7734     del self.remove_locks[locking.LEVEL_INSTANCE]
7735     # Unlock all the nodes
7736     if self.op.mode == constants.INSTANCE_IMPORT:
7737       nodes_keep = [self.op.src_node]
7738       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7739                        if node != self.op.src_node]
7740       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7741       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7742     else:
7743       self.context.glm.release(locking.LEVEL_NODE)
7744       del self.acquired_locks[locking.LEVEL_NODE]
7745
7746     if self.op.wait_for_sync:
7747       disk_abort = not _WaitForSync(self, iobj)
7748     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7749       # make sure the disks are not degraded (still sync-ing is ok)
7750       time.sleep(15)
7751       feedback_fn("* checking mirrors status")
7752       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7753     else:
7754       disk_abort = False
7755
7756     if disk_abort:
7757       _RemoveDisks(self, iobj)
7758       self.cfg.RemoveInstance(iobj.name)
7759       # Make sure the instance lock gets removed
7760       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7761       raise errors.OpExecError("There are some degraded disks for"
7762                                " this instance")
7763
7764     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7765       if self.op.mode == constants.INSTANCE_CREATE:
7766         if not self.op.no_install:
7767           feedback_fn("* running the instance OS create scripts...")
7768           # FIXME: pass debug option from opcode to backend
7769           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7770                                                  self.op.debug_level)
7771           result.Raise("Could not add os for instance %s"
7772                        " on node %s" % (instance, pnode_name))
7773
7774       elif self.op.mode == constants.INSTANCE_IMPORT:
7775         feedback_fn("* running the instance OS import scripts...")
7776
7777         transfers = []
7778
7779         for idx, image in enumerate(self.src_images):
7780           if not image:
7781             continue
7782
7783           # FIXME: pass debug option from opcode to backend
7784           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7785                                              constants.IEIO_FILE, (image, ),
7786                                              constants.IEIO_SCRIPT,
7787                                              (iobj.disks[idx], idx),
7788                                              None)
7789           transfers.append(dt)
7790
7791         import_result = \
7792           masterd.instance.TransferInstanceData(self, feedback_fn,
7793                                                 self.op.src_node, pnode_name,
7794                                                 self.pnode.secondary_ip,
7795                                                 iobj, transfers)
7796         if not compat.all(import_result):
7797           self.LogWarning("Some disks for instance %s on node %s were not"
7798                           " imported successfully" % (instance, pnode_name))
7799
7800       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7801         feedback_fn("* preparing remote import...")
7802         connect_timeout = constants.RIE_CONNECT_TIMEOUT
7803         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7804
7805         disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7806                                                      self.source_x509_ca,
7807                                                      self._cds, timeouts)
7808         if not compat.all(disk_results):
7809           # TODO: Should the instance still be started, even if some disks
7810           # failed to import (valid for local imports, too)?
7811           self.LogWarning("Some disks for instance %s on node %s were not"
7812                           " imported successfully" % (instance, pnode_name))
7813
7814         # Run rename script on newly imported instance
7815         assert iobj.name == instance
7816         feedback_fn("Running rename script for %s" % instance)
7817         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7818                                                    self.source_instance_name,
7819                                                    self.op.debug_level)
7820         if result.fail_msg:
7821           self.LogWarning("Failed to run rename script for %s on node"
7822                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7823
7824       else:
7825         # also checked in the prereq part
7826         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7827                                      % self.op.mode)
7828
7829     if self.op.start:
7830       iobj.admin_up = True
7831       self.cfg.Update(iobj, feedback_fn)
7832       logging.info("Starting instance %s on node %s", instance, pnode_name)
7833       feedback_fn("* starting instance...")
7834       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7835       result.Raise("Could not start instance")
7836
7837     return list(iobj.all_nodes)
7838
7839
7840 class LUConnectConsole(NoHooksLU):
7841   """Connect to an instance's console.
7842
7843   This is somewhat special in that it returns the command line that
7844   you need to run on the master node in order to connect to the
7845   console.
7846
7847   """
7848   _OP_PARAMS = [
7849     _PInstanceName
7850     ]
7851   REQ_BGL = False
7852
7853   def ExpandNames(self):
7854     self._ExpandAndLockInstance()
7855
7856   def CheckPrereq(self):
7857     """Check prerequisites.
7858
7859     This checks that the instance is in the cluster.
7860
7861     """
7862     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7863     assert self.instance is not None, \
7864       "Cannot retrieve locked instance %s" % self.op.instance_name
7865     _CheckNodeOnline(self, self.instance.primary_node)
7866
7867   def Exec(self, feedback_fn):
7868     """Connect to the console of an instance
7869
7870     """
7871     instance = self.instance
7872     node = instance.primary_node
7873
7874     node_insts = self.rpc.call_instance_list([node],
7875                                              [instance.hypervisor])[node]
7876     node_insts.Raise("Can't get node information from %s" % node)
7877
7878     if instance.name not in node_insts.payload:
7879       if instance.admin_up:
7880         state = "ERROR_down"
7881       else:
7882         state = "ADMIN_down"
7883       raise errors.OpExecError("Instance %s is not running (state %s)" %
7884                                (instance.name, state))
7885
7886     logging.debug("Connecting to console of %s on %s", instance.name, node)
7887
7888     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7889     cluster = self.cfg.GetClusterInfo()
7890     # beparams and hvparams are passed separately, to avoid editing the
7891     # instance and then saving the defaults in the instance itself.
7892     hvparams = cluster.FillHV(instance)
7893     beparams = cluster.FillBE(instance)
7894     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7895
7896     # build ssh cmdline
7897     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7898
7899
7900 class LUReplaceDisks(LogicalUnit):
7901   """Replace the disks of an instance.
7902
7903   """
7904   HPATH = "mirrors-replace"
7905   HTYPE = constants.HTYPE_INSTANCE
7906   _OP_PARAMS = [
7907     _PInstanceName,
7908     ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
7909     ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
7910     ("remote_node", None, ht.TMaybeString),
7911     ("iallocator", None, ht.TMaybeString),
7912     ("early_release", False, ht.TBool),
7913     ]
7914   REQ_BGL = False
7915
7916   def CheckArguments(self):
7917     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7918                                   self.op.iallocator)
7919
7920   def ExpandNames(self):
7921     self._ExpandAndLockInstance()
7922
7923     if self.op.iallocator is not None:
7924       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7925
7926     elif self.op.remote_node is not None:
7927       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7928       self.op.remote_node = remote_node
7929
7930       # Warning: do not remove the locking of the new secondary here
7931       # unless DRBD8.AddChildren is changed to work in parallel;
7932       # currently it doesn't since parallel invocations of
7933       # FindUnusedMinor will conflict
7934       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7935       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7936
7937     else:
7938       self.needed_locks[locking.LEVEL_NODE] = []
7939       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7940
7941     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7942                                    self.op.iallocator, self.op.remote_node,
7943                                    self.op.disks, False, self.op.early_release)
7944
7945     self.tasklets = [self.replacer]
7946
7947   def DeclareLocks(self, level):
7948     # If we're not already locking all nodes in the set we have to declare the
7949     # instance's primary/secondary nodes.
7950     if (level == locking.LEVEL_NODE and
7951         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7952       self._LockInstancesNodes()
7953
7954   def BuildHooksEnv(self):
7955     """Build hooks env.
7956
7957     This runs on the master, the primary and all the secondaries.
7958
7959     """
7960     instance = self.replacer.instance
7961     env = {
7962       "MODE": self.op.mode,
7963       "NEW_SECONDARY": self.op.remote_node,
7964       "OLD_SECONDARY": instance.secondary_nodes[0],
7965       }
7966     env.update(_BuildInstanceHookEnvByObject(self, instance))
7967     nl = [
7968       self.cfg.GetMasterNode(),
7969       instance.primary_node,
7970       ]
7971     if self.op.remote_node is not None:
7972       nl.append(self.op.remote_node)
7973     return env, nl, nl
7974
7975
7976 class TLReplaceDisks(Tasklet):
7977   """Replaces disks for an instance.
7978
7979   Note: Locking is not within the scope of this class.
7980
7981   """
7982   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7983                disks, delay_iallocator, early_release):
7984     """Initializes this class.
7985
7986     """
7987     Tasklet.__init__(self, lu)
7988
7989     # Parameters
7990     self.instance_name = instance_name
7991     self.mode = mode
7992     self.iallocator_name = iallocator_name
7993     self.remote_node = remote_node
7994     self.disks = disks
7995     self.delay_iallocator = delay_iallocator
7996     self.early_release = early_release
7997
7998     # Runtime data
7999     self.instance = None
8000     self.new_node = None
8001     self.target_node = None
8002     self.other_node = None
8003     self.remote_node_info = None
8004     self.node_secondary_ip = None
8005
8006   @staticmethod
8007   def CheckArguments(mode, remote_node, iallocator):
8008     """Helper function for users of this class.
8009
8010     """
8011     # check for valid parameter combination
8012     if mode == constants.REPLACE_DISK_CHG:
8013       if remote_node is None and iallocator is None:
8014         raise errors.OpPrereqError("When changing the secondary either an"
8015                                    " iallocator script must be used or the"
8016                                    " new node given", errors.ECODE_INVAL)
8017
8018       if remote_node is not None and iallocator is not None:
8019         raise errors.OpPrereqError("Give either the iallocator or the new"
8020                                    " secondary, not both", errors.ECODE_INVAL)
8021
8022     elif remote_node is not None or iallocator is not None:
8023       # Not replacing the secondary
8024       raise errors.OpPrereqError("The iallocator and new node options can"
8025                                  " only be used when changing the"
8026                                  " secondary node", errors.ECODE_INVAL)
8027
8028   @staticmethod
8029   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8030     """Compute a new secondary node using an IAllocator.
8031
8032     """
8033     ial = IAllocator(lu.cfg, lu.rpc,
8034                      mode=constants.IALLOCATOR_MODE_RELOC,
8035                      name=instance_name,
8036                      relocate_from=relocate_from)
8037
8038     ial.Run(iallocator_name)
8039
8040     if not ial.success:
8041       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8042                                  " %s" % (iallocator_name, ial.info),
8043                                  errors.ECODE_NORES)
8044
8045     if len(ial.result) != ial.required_nodes:
8046       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8047                                  " of nodes (%s), required %s" %
8048                                  (iallocator_name,
8049                                   len(ial.result), ial.required_nodes),
8050                                  errors.ECODE_FAULT)
8051
8052     remote_node_name = ial.result[0]
8053
8054     lu.LogInfo("Selected new secondary for instance '%s': %s",
8055                instance_name, remote_node_name)
8056
8057     return remote_node_name
8058
8059   def _FindFaultyDisks(self, node_name):
8060     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8061                                     node_name, True)
8062
8063   def CheckPrereq(self):
8064     """Check prerequisites.
8065
8066     This checks that the instance is in the cluster.
8067
8068     """
8069     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8070     assert instance is not None, \
8071       "Cannot retrieve locked instance %s" % self.instance_name
8072
8073     if instance.disk_template != constants.DT_DRBD8:
8074       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8075                                  " instances", errors.ECODE_INVAL)
8076
8077     if len(instance.secondary_nodes) != 1:
8078       raise errors.OpPrereqError("The instance has a strange layout,"
8079                                  " expected one secondary but found %d" %
8080                                  len(instance.secondary_nodes),
8081                                  errors.ECODE_FAULT)
8082
8083     if not self.delay_iallocator:
8084       self._CheckPrereq2()
8085
8086   def _CheckPrereq2(self):
8087     """Check prerequisites, second part.
8088
8089     This function should always be part of CheckPrereq. It was separated and is
8090     now called from Exec because during node evacuation iallocator was only
8091     called with an unmodified cluster model, not taking planned changes into
8092     account.
8093
8094     """
8095     instance = self.instance
8096     secondary_node = instance.secondary_nodes[0]
8097
8098     if self.iallocator_name is None:
8099       remote_node = self.remote_node
8100     else:
8101       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8102                                        instance.name, instance.secondary_nodes)
8103
8104     if remote_node is not None:
8105       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8106       assert self.remote_node_info is not None, \
8107         "Cannot retrieve locked node %s" % remote_node
8108     else:
8109       self.remote_node_info = None
8110
8111     if remote_node == self.instance.primary_node:
8112       raise errors.OpPrereqError("The specified node is the primary node of"
8113                                  " the instance.", errors.ECODE_INVAL)
8114
8115     if remote_node == secondary_node:
8116       raise errors.OpPrereqError("The specified node is already the"
8117                                  " secondary node of the instance.",
8118                                  errors.ECODE_INVAL)
8119
8120     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8121                                     constants.REPLACE_DISK_CHG):
8122       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8123                                  errors.ECODE_INVAL)
8124
8125     if self.mode == constants.REPLACE_DISK_AUTO:
8126       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8127       faulty_secondary = self._FindFaultyDisks(secondary_node)
8128
8129       if faulty_primary and faulty_secondary:
8130         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8131                                    " one node and can not be repaired"
8132                                    " automatically" % self.instance_name,
8133                                    errors.ECODE_STATE)
8134
8135       if faulty_primary:
8136         self.disks = faulty_primary
8137         self.target_node = instance.primary_node
8138         self.other_node = secondary_node
8139         check_nodes = [self.target_node, self.other_node]
8140       elif faulty_secondary:
8141         self.disks = faulty_secondary
8142         self.target_node = secondary_node
8143         self.other_node = instance.primary_node
8144         check_nodes = [self.target_node, self.other_node]
8145       else:
8146         self.disks = []
8147         check_nodes = []
8148
8149     else:
8150       # Non-automatic modes
8151       if self.mode == constants.REPLACE_DISK_PRI:
8152         self.target_node = instance.primary_node
8153         self.other_node = secondary_node
8154         check_nodes = [self.target_node, self.other_node]
8155
8156       elif self.mode == constants.REPLACE_DISK_SEC:
8157         self.target_node = secondary_node
8158         self.other_node = instance.primary_node
8159         check_nodes = [self.target_node, self.other_node]
8160
8161       elif self.mode == constants.REPLACE_DISK_CHG:
8162         self.new_node = remote_node
8163         self.other_node = instance.primary_node
8164         self.target_node = secondary_node
8165         check_nodes = [self.new_node, self.other_node]
8166
8167         _CheckNodeNotDrained(self.lu, remote_node)
8168         _CheckNodeVmCapable(self.lu, remote_node)
8169
8170         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8171         assert old_node_info is not None
8172         if old_node_info.offline and not self.early_release:
8173           # doesn't make sense to delay the release
8174           self.early_release = True
8175           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8176                           " early-release mode", secondary_node)
8177
8178       else:
8179         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8180                                      self.mode)
8181
8182       # If not specified all disks should be replaced
8183       if not self.disks:
8184         self.disks = range(len(self.instance.disks))
8185
8186     for node in check_nodes:
8187       _CheckNodeOnline(self.lu, node)
8188
8189     # Check whether disks are valid
8190     for disk_idx in self.disks:
8191       instance.FindDisk(disk_idx)
8192
8193     # Get secondary node IP addresses
8194     node_2nd_ip = {}
8195
8196     for node_name in [self.target_node, self.other_node, self.new_node]:
8197       if node_name is not None:
8198         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8199
8200     self.node_secondary_ip = node_2nd_ip
8201
8202   def Exec(self, feedback_fn):
8203     """Execute disk replacement.
8204
8205     This dispatches the disk replacement to the appropriate handler.
8206
8207     """
8208     if self.delay_iallocator:
8209       self._CheckPrereq2()
8210
8211     if not self.disks:
8212       feedback_fn("No disks need replacement")
8213       return
8214
8215     feedback_fn("Replacing disk(s) %s for %s" %
8216                 (utils.CommaJoin(self.disks), self.instance.name))
8217
8218     activate_disks = (not self.instance.admin_up)
8219
8220     # Activate the instance disks if we're replacing them on a down instance
8221     if activate_disks:
8222       _StartInstanceDisks(self.lu, self.instance, True)
8223
8224     try:
8225       # Should we replace the secondary node?
8226       if self.new_node is not None:
8227         fn = self._ExecDrbd8Secondary
8228       else:
8229         fn = self._ExecDrbd8DiskOnly
8230
8231       return fn(feedback_fn)
8232
8233     finally:
8234       # Deactivate the instance disks if we're replacing them on a
8235       # down instance
8236       if activate_disks:
8237         _SafeShutdownInstanceDisks(self.lu, self.instance)
8238
8239   def _CheckVolumeGroup(self, nodes):
8240     self.lu.LogInfo("Checking volume groups")
8241
8242     vgname = self.cfg.GetVGName()
8243
8244     # Make sure volume group exists on all involved nodes
8245     results = self.rpc.call_vg_list(nodes)
8246     if not results:
8247       raise errors.OpExecError("Can't list volume groups on the nodes")
8248
8249     for node in nodes:
8250       res = results[node]
8251       res.Raise("Error checking node %s" % node)
8252       if vgname not in res.payload:
8253         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8254                                  (vgname, node))
8255
8256   def _CheckDisksExistence(self, nodes):
8257     # Check disk existence
8258     for idx, dev in enumerate(self.instance.disks):
8259       if idx not in self.disks:
8260         continue
8261
8262       for node in nodes:
8263         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8264         self.cfg.SetDiskID(dev, node)
8265
8266         result = self.rpc.call_blockdev_find(node, dev)
8267
8268         msg = result.fail_msg
8269         if msg or not result.payload:
8270           if not msg:
8271             msg = "disk not found"
8272           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8273                                    (idx, node, msg))
8274
8275   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8276     for idx, dev in enumerate(self.instance.disks):
8277       if idx not in self.disks:
8278         continue
8279
8280       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8281                       (idx, node_name))
8282
8283       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8284                                    ldisk=ldisk):
8285         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8286                                  " replace disks for instance %s" %
8287                                  (node_name, self.instance.name))
8288
8289   def _CreateNewStorage(self, node_name):
8290     vgname = self.cfg.GetVGName()
8291     iv_names = {}
8292
8293     for idx, dev in enumerate(self.instance.disks):
8294       if idx not in self.disks:
8295         continue
8296
8297       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8298
8299       self.cfg.SetDiskID(dev, node_name)
8300
8301       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8302       names = _GenerateUniqueNames(self.lu, lv_names)
8303
8304       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8305                              logical_id=(vgname, names[0]))
8306       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8307                              logical_id=(vgname, names[1]))
8308
8309       new_lvs = [lv_data, lv_meta]
8310       old_lvs = dev.children
8311       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8312
8313       # we pass force_create=True to force the LVM creation
8314       for new_lv in new_lvs:
8315         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8316                         _GetInstanceInfoText(self.instance), False)
8317
8318     return iv_names
8319
8320   def _CheckDevices(self, node_name, iv_names):
8321     for name, (dev, _, _) in iv_names.iteritems():
8322       self.cfg.SetDiskID(dev, node_name)
8323
8324       result = self.rpc.call_blockdev_find(node_name, dev)
8325
8326       msg = result.fail_msg
8327       if msg or not result.payload:
8328         if not msg:
8329           msg = "disk not found"
8330         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8331                                  (name, msg))
8332
8333       if result.payload.is_degraded:
8334         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8335
8336   def _RemoveOldStorage(self, node_name, iv_names):
8337     for name, (_, old_lvs, _) in iv_names.iteritems():
8338       self.lu.LogInfo("Remove logical volumes for %s" % name)
8339
8340       for lv in old_lvs:
8341         self.cfg.SetDiskID(lv, node_name)
8342
8343         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8344         if msg:
8345           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8346                              hint="remove unused LVs manually")
8347
8348   def _ReleaseNodeLock(self, node_name):
8349     """Releases the lock for a given node."""
8350     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8351
8352   def _ExecDrbd8DiskOnly(self, feedback_fn):
8353     """Replace a disk on the primary or secondary for DRBD 8.
8354
8355     The algorithm for replace is quite complicated:
8356
8357       1. for each disk to be replaced:
8358
8359         1. create new LVs on the target node with unique names
8360         1. detach old LVs from the drbd device
8361         1. rename old LVs to name_replaced.<time_t>
8362         1. rename new LVs to old LVs
8363         1. attach the new LVs (with the old names now) to the drbd device
8364
8365       1. wait for sync across all devices
8366
8367       1. for each modified disk:
8368
8369         1. remove old LVs (which have the name name_replaces.<time_t>)
8370
8371     Failures are not very well handled.
8372
8373     """
8374     steps_total = 6
8375
8376     # Step: check device activation
8377     self.lu.LogStep(1, steps_total, "Check device existence")
8378     self._CheckDisksExistence([self.other_node, self.target_node])
8379     self._CheckVolumeGroup([self.target_node, self.other_node])
8380
8381     # Step: check other node consistency
8382     self.lu.LogStep(2, steps_total, "Check peer consistency")
8383     self._CheckDisksConsistency(self.other_node,
8384                                 self.other_node == self.instance.primary_node,
8385                                 False)
8386
8387     # Step: create new storage
8388     self.lu.LogStep(3, steps_total, "Allocate new storage")
8389     iv_names = self._CreateNewStorage(self.target_node)
8390
8391     # Step: for each lv, detach+rename*2+attach
8392     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8393     for dev, old_lvs, new_lvs in iv_names.itervalues():
8394       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8395
8396       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8397                                                      old_lvs)
8398       result.Raise("Can't detach drbd from local storage on node"
8399                    " %s for device %s" % (self.target_node, dev.iv_name))
8400       #dev.children = []
8401       #cfg.Update(instance)
8402
8403       # ok, we created the new LVs, so now we know we have the needed
8404       # storage; as such, we proceed on the target node to rename
8405       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8406       # using the assumption that logical_id == physical_id (which in
8407       # turn is the unique_id on that node)
8408
8409       # FIXME(iustin): use a better name for the replaced LVs
8410       temp_suffix = int(time.time())
8411       ren_fn = lambda d, suff: (d.physical_id[0],
8412                                 d.physical_id[1] + "_replaced-%s" % suff)
8413
8414       # Build the rename list based on what LVs exist on the node
8415       rename_old_to_new = []
8416       for to_ren in old_lvs:
8417         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8418         if not result.fail_msg and result.payload:
8419           # device exists
8420           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8421
8422       self.lu.LogInfo("Renaming the old LVs on the target node")
8423       result = self.rpc.call_blockdev_rename(self.target_node,
8424                                              rename_old_to_new)
8425       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8426
8427       # Now we rename the new LVs to the old LVs
8428       self.lu.LogInfo("Renaming the new LVs on the target node")
8429       rename_new_to_old = [(new, old.physical_id)
8430                            for old, new in zip(old_lvs, new_lvs)]
8431       result = self.rpc.call_blockdev_rename(self.target_node,
8432                                              rename_new_to_old)
8433       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8434
8435       for old, new in zip(old_lvs, new_lvs):
8436         new.logical_id = old.logical_id
8437         self.cfg.SetDiskID(new, self.target_node)
8438
8439       for disk in old_lvs:
8440         disk.logical_id = ren_fn(disk, temp_suffix)
8441         self.cfg.SetDiskID(disk, self.target_node)
8442
8443       # Now that the new lvs have the old name, we can add them to the device
8444       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8445       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8446                                                   new_lvs)
8447       msg = result.fail_msg
8448       if msg:
8449         for new_lv in new_lvs:
8450           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8451                                                new_lv).fail_msg
8452           if msg2:
8453             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8454                                hint=("cleanup manually the unused logical"
8455                                      "volumes"))
8456         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8457
8458       dev.children = new_lvs
8459
8460       self.cfg.Update(self.instance, feedback_fn)
8461
8462     cstep = 5
8463     if self.early_release:
8464       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8465       cstep += 1
8466       self._RemoveOldStorage(self.target_node, iv_names)
8467       # WARNING: we release both node locks here, do not do other RPCs
8468       # than WaitForSync to the primary node
8469       self._ReleaseNodeLock([self.target_node, self.other_node])
8470
8471     # Wait for sync
8472     # This can fail as the old devices are degraded and _WaitForSync
8473     # does a combined result over all disks, so we don't check its return value
8474     self.lu.LogStep(cstep, steps_total, "Sync devices")
8475     cstep += 1
8476     _WaitForSync(self.lu, self.instance)
8477
8478     # Check all devices manually
8479     self._CheckDevices(self.instance.primary_node, iv_names)
8480
8481     # Step: remove old storage
8482     if not self.early_release:
8483       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8484       cstep += 1
8485       self._RemoveOldStorage(self.target_node, iv_names)
8486
8487   def _ExecDrbd8Secondary(self, feedback_fn):
8488     """Replace the secondary node for DRBD 8.
8489
8490     The algorithm for replace is quite complicated:
8491       - for all disks of the instance:
8492         - create new LVs on the new node with same names
8493         - shutdown the drbd device on the old secondary
8494         - disconnect the drbd network on the primary
8495         - create the drbd device on the new secondary
8496         - network attach the drbd on the primary, using an artifice:
8497           the drbd code for Attach() will connect to the network if it
8498           finds a device which is connected to the good local disks but
8499           not network enabled
8500       - wait for sync across all devices
8501       - remove all disks from the old secondary
8502
8503     Failures are not very well handled.
8504
8505     """
8506     steps_total = 6
8507
8508     # Step: check device activation
8509     self.lu.LogStep(1, steps_total, "Check device existence")
8510     self._CheckDisksExistence([self.instance.primary_node])
8511     self._CheckVolumeGroup([self.instance.primary_node])
8512
8513     # Step: check other node consistency
8514     self.lu.LogStep(2, steps_total, "Check peer consistency")
8515     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8516
8517     # Step: create new storage
8518     self.lu.LogStep(3, steps_total, "Allocate new storage")
8519     for idx, dev in enumerate(self.instance.disks):
8520       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8521                       (self.new_node, idx))
8522       # we pass force_create=True to force LVM creation
8523       for new_lv in dev.children:
8524         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8525                         _GetInstanceInfoText(self.instance), False)
8526
8527     # Step 4: dbrd minors and drbd setups changes
8528     # after this, we must manually remove the drbd minors on both the
8529     # error and the success paths
8530     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8531     minors = self.cfg.AllocateDRBDMinor([self.new_node
8532                                          for dev in self.instance.disks],
8533                                         self.instance.name)
8534     logging.debug("Allocated minors %r", minors)
8535
8536     iv_names = {}
8537     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8538       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8539                       (self.new_node, idx))
8540       # create new devices on new_node; note that we create two IDs:
8541       # one without port, so the drbd will be activated without
8542       # networking information on the new node at this stage, and one
8543       # with network, for the latter activation in step 4
8544       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8545       if self.instance.primary_node == o_node1:
8546         p_minor = o_minor1
8547       else:
8548         assert self.instance.primary_node == o_node2, "Three-node instance?"
8549         p_minor = o_minor2
8550
8551       new_alone_id = (self.instance.primary_node, self.new_node, None,
8552                       p_minor, new_minor, o_secret)
8553       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8554                     p_minor, new_minor, o_secret)
8555
8556       iv_names[idx] = (dev, dev.children, new_net_id)
8557       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8558                     new_net_id)
8559       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8560                               logical_id=new_alone_id,
8561                               children=dev.children,
8562                               size=dev.size)
8563       try:
8564         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8565                               _GetInstanceInfoText(self.instance), False)
8566       except errors.GenericError:
8567         self.cfg.ReleaseDRBDMinors(self.instance.name)
8568         raise
8569
8570     # We have new devices, shutdown the drbd on the old secondary
8571     for idx, dev in enumerate(self.instance.disks):
8572       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8573       self.cfg.SetDiskID(dev, self.target_node)
8574       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8575       if msg:
8576         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8577                            "node: %s" % (idx, msg),
8578                            hint=("Please cleanup this device manually as"
8579                                  " soon as possible"))
8580
8581     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8582     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8583                                                self.node_secondary_ip,
8584                                                self.instance.disks)\
8585                                               [self.instance.primary_node]
8586
8587     msg = result.fail_msg
8588     if msg:
8589       # detaches didn't succeed (unlikely)
8590       self.cfg.ReleaseDRBDMinors(self.instance.name)
8591       raise errors.OpExecError("Can't detach the disks from the network on"
8592                                " old node: %s" % (msg,))
8593
8594     # if we managed to detach at least one, we update all the disks of
8595     # the instance to point to the new secondary
8596     self.lu.LogInfo("Updating instance configuration")
8597     for dev, _, new_logical_id in iv_names.itervalues():
8598       dev.logical_id = new_logical_id
8599       self.cfg.SetDiskID(dev, self.instance.primary_node)
8600
8601     self.cfg.Update(self.instance, feedback_fn)
8602
8603     # and now perform the drbd attach
8604     self.lu.LogInfo("Attaching primary drbds to new secondary"
8605                     " (standalone => connected)")
8606     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8607                                             self.new_node],
8608                                            self.node_secondary_ip,
8609                                            self.instance.disks,
8610                                            self.instance.name,
8611                                            False)
8612     for to_node, to_result in result.items():
8613       msg = to_result.fail_msg
8614       if msg:
8615         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8616                            to_node, msg,
8617                            hint=("please do a gnt-instance info to see the"
8618                                  " status of disks"))
8619     cstep = 5
8620     if self.early_release:
8621       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8622       cstep += 1
8623       self._RemoveOldStorage(self.target_node, iv_names)
8624       # WARNING: we release all node locks here, do not do other RPCs
8625       # than WaitForSync to the primary node
8626       self._ReleaseNodeLock([self.instance.primary_node,
8627                              self.target_node,
8628                              self.new_node])
8629
8630     # Wait for sync
8631     # This can fail as the old devices are degraded and _WaitForSync
8632     # does a combined result over all disks, so we don't check its return value
8633     self.lu.LogStep(cstep, steps_total, "Sync devices")
8634     cstep += 1
8635     _WaitForSync(self.lu, self.instance)
8636
8637     # Check all devices manually
8638     self._CheckDevices(self.instance.primary_node, iv_names)
8639
8640     # Step: remove old storage
8641     if not self.early_release:
8642       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8643       self._RemoveOldStorage(self.target_node, iv_names)
8644
8645
8646 class LURepairNodeStorage(NoHooksLU):
8647   """Repairs the volume group on a node.
8648
8649   """
8650   _OP_PARAMS = [
8651     _PNodeName,
8652     ("storage_type", ht.NoDefault, _CheckStorageType),
8653     ("name", ht.NoDefault, ht.TNonEmptyString),
8654     ("ignore_consistency", False, ht.TBool),
8655     ]
8656   REQ_BGL = False
8657
8658   def CheckArguments(self):
8659     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8660
8661     storage_type = self.op.storage_type
8662
8663     if (constants.SO_FIX_CONSISTENCY not in
8664         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8665       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8666                                  " repaired" % storage_type,
8667                                  errors.ECODE_INVAL)
8668
8669   def ExpandNames(self):
8670     self.needed_locks = {
8671       locking.LEVEL_NODE: [self.op.node_name],
8672       }
8673
8674   def _CheckFaultyDisks(self, instance, node_name):
8675     """Ensure faulty disks abort the opcode or at least warn."""
8676     try:
8677       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8678                                   node_name, True):
8679         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8680                                    " node '%s'" % (instance.name, node_name),
8681                                    errors.ECODE_STATE)
8682     except errors.OpPrereqError, err:
8683       if self.op.ignore_consistency:
8684         self.proc.LogWarning(str(err.args[0]))
8685       else:
8686         raise
8687
8688   def CheckPrereq(self):
8689     """Check prerequisites.
8690
8691     """
8692     # Check whether any instance on this node has faulty disks
8693     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8694       if not inst.admin_up:
8695         continue
8696       check_nodes = set(inst.all_nodes)
8697       check_nodes.discard(self.op.node_name)
8698       for inst_node_name in check_nodes:
8699         self._CheckFaultyDisks(inst, inst_node_name)
8700
8701   def Exec(self, feedback_fn):
8702     feedback_fn("Repairing storage unit '%s' on %s ..." %
8703                 (self.op.name, self.op.node_name))
8704
8705     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8706     result = self.rpc.call_storage_execute(self.op.node_name,
8707                                            self.op.storage_type, st_args,
8708                                            self.op.name,
8709                                            constants.SO_FIX_CONSISTENCY)
8710     result.Raise("Failed to repair storage unit '%s' on %s" %
8711                  (self.op.name, self.op.node_name))
8712
8713
8714 class LUNodeEvacuationStrategy(NoHooksLU):
8715   """Computes the node evacuation strategy.
8716
8717   """
8718   _OP_PARAMS = [
8719     ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8720     ("remote_node", None, ht.TMaybeString),
8721     ("iallocator", None, ht.TMaybeString),
8722     ]
8723   REQ_BGL = False
8724
8725   def CheckArguments(self):
8726     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8727
8728   def ExpandNames(self):
8729     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8730     self.needed_locks = locks = {}
8731     if self.op.remote_node is None:
8732       locks[locking.LEVEL_NODE] = locking.ALL_SET
8733     else:
8734       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8735       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8736
8737   def Exec(self, feedback_fn):
8738     if self.op.remote_node is not None:
8739       instances = []
8740       for node in self.op.nodes:
8741         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8742       result = []
8743       for i in instances:
8744         if i.primary_node == self.op.remote_node:
8745           raise errors.OpPrereqError("Node %s is the primary node of"
8746                                      " instance %s, cannot use it as"
8747                                      " secondary" %
8748                                      (self.op.remote_node, i.name),
8749                                      errors.ECODE_INVAL)
8750         result.append([i.name, self.op.remote_node])
8751     else:
8752       ial = IAllocator(self.cfg, self.rpc,
8753                        mode=constants.IALLOCATOR_MODE_MEVAC,
8754                        evac_nodes=self.op.nodes)
8755       ial.Run(self.op.iallocator, validate=True)
8756       if not ial.success:
8757         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8758                                  errors.ECODE_NORES)
8759       result = ial.result
8760     return result
8761
8762
8763 class LUGrowDisk(LogicalUnit):
8764   """Grow a disk of an instance.
8765
8766   """
8767   HPATH = "disk-grow"
8768   HTYPE = constants.HTYPE_INSTANCE
8769   _OP_PARAMS = [
8770     _PInstanceName,
8771     ("disk", ht.NoDefault, ht.TInt),
8772     ("amount", ht.NoDefault, ht.TInt),
8773     ("wait_for_sync", True, ht.TBool),
8774     ]
8775   REQ_BGL = False
8776
8777   def ExpandNames(self):
8778     self._ExpandAndLockInstance()
8779     self.needed_locks[locking.LEVEL_NODE] = []
8780     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8781
8782   def DeclareLocks(self, level):
8783     if level == locking.LEVEL_NODE:
8784       self._LockInstancesNodes()
8785
8786   def BuildHooksEnv(self):
8787     """Build hooks env.
8788
8789     This runs on the master, the primary and all the secondaries.
8790
8791     """
8792     env = {
8793       "DISK": self.op.disk,
8794       "AMOUNT": self.op.amount,
8795       }
8796     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8797     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8798     return env, nl, nl
8799
8800   def CheckPrereq(self):
8801     """Check prerequisites.
8802
8803     This checks that the instance is in the cluster.
8804
8805     """
8806     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8807     assert instance is not None, \
8808       "Cannot retrieve locked instance %s" % self.op.instance_name
8809     nodenames = list(instance.all_nodes)
8810     for node in nodenames:
8811       _CheckNodeOnline(self, node)
8812
8813     self.instance = instance
8814
8815     if instance.disk_template not in constants.DTS_GROWABLE:
8816       raise errors.OpPrereqError("Instance's disk layout does not support"
8817                                  " growing.", errors.ECODE_INVAL)
8818
8819     self.disk = instance.FindDisk(self.op.disk)
8820
8821     if instance.disk_template != constants.DT_FILE:
8822       # TODO: check the free disk space for file, when that feature
8823       # will be supported
8824       _CheckNodesFreeDiskPerVG(self, nodenames,
8825                                {self.disk.physical_id[0]: self.op.amount})
8826
8827   def Exec(self, feedback_fn):
8828     """Execute disk grow.
8829
8830     """
8831     instance = self.instance
8832     disk = self.disk
8833
8834     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8835     if not disks_ok:
8836       raise errors.OpExecError("Cannot activate block device to grow")
8837
8838     for node in instance.all_nodes:
8839       self.cfg.SetDiskID(disk, node)
8840       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8841       result.Raise("Grow request failed to node %s" % node)
8842
8843       # TODO: Rewrite code to work properly
8844       # DRBD goes into sync mode for a short amount of time after executing the
8845       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8846       # calling "resize" in sync mode fails. Sleeping for a short amount of
8847       # time is a work-around.
8848       time.sleep(5)
8849
8850     disk.RecordGrow(self.op.amount)
8851     self.cfg.Update(instance, feedback_fn)
8852     if self.op.wait_for_sync:
8853       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8854       if disk_abort:
8855         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8856                              " status.\nPlease check the instance.")
8857       if not instance.admin_up:
8858         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8859     elif not instance.admin_up:
8860       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8861                            " not supposed to be running because no wait for"
8862                            " sync mode was requested.")
8863
8864
8865 class LUQueryInstanceData(NoHooksLU):
8866   """Query runtime instance data.
8867
8868   """
8869   _OP_PARAMS = [
8870     ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
8871     ("static", False, ht.TBool),
8872     ]
8873   REQ_BGL = False
8874
8875   def ExpandNames(self):
8876     self.needed_locks = {}
8877     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8878
8879     if self.op.instances:
8880       self.wanted_names = []
8881       for name in self.op.instances:
8882         full_name = _ExpandInstanceName(self.cfg, name)
8883         self.wanted_names.append(full_name)
8884       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8885     else:
8886       self.wanted_names = None
8887       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8888
8889     self.needed_locks[locking.LEVEL_NODE] = []
8890     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8891
8892   def DeclareLocks(self, level):
8893     if level == locking.LEVEL_NODE:
8894       self._LockInstancesNodes()
8895
8896   def CheckPrereq(self):
8897     """Check prerequisites.
8898
8899     This only checks the optional instance list against the existing names.
8900
8901     """
8902     if self.wanted_names is None:
8903       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8904
8905     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8906                              in self.wanted_names]
8907
8908   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8909     """Returns the status of a block device
8910
8911     """
8912     if self.op.static or not node:
8913       return None
8914
8915     self.cfg.SetDiskID(dev, node)
8916
8917     result = self.rpc.call_blockdev_find(node, dev)
8918     if result.offline:
8919       return None
8920
8921     result.Raise("Can't compute disk status for %s" % instance_name)
8922
8923     status = result.payload
8924     if status is None:
8925       return None
8926
8927     return (status.dev_path, status.major, status.minor,
8928             status.sync_percent, status.estimated_time,
8929             status.is_degraded, status.ldisk_status)
8930
8931   def _ComputeDiskStatus(self, instance, snode, dev):
8932     """Compute block device status.
8933
8934     """
8935     if dev.dev_type in constants.LDS_DRBD:
8936       # we change the snode then (otherwise we use the one passed in)
8937       if dev.logical_id[0] == instance.primary_node:
8938         snode = dev.logical_id[1]
8939       else:
8940         snode = dev.logical_id[0]
8941
8942     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8943                                               instance.name, dev)
8944     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8945
8946     if dev.children:
8947       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8948                       for child in dev.children]
8949     else:
8950       dev_children = []
8951
8952     data = {
8953       "iv_name": dev.iv_name,
8954       "dev_type": dev.dev_type,
8955       "logical_id": dev.logical_id,
8956       "physical_id": dev.physical_id,
8957       "pstatus": dev_pstatus,
8958       "sstatus": dev_sstatus,
8959       "children": dev_children,
8960       "mode": dev.mode,
8961       "size": dev.size,
8962       }
8963
8964     return data
8965
8966   def Exec(self, feedback_fn):
8967     """Gather and return data"""
8968     result = {}
8969
8970     cluster = self.cfg.GetClusterInfo()
8971
8972     for instance in self.wanted_instances:
8973       if not self.op.static:
8974         remote_info = self.rpc.call_instance_info(instance.primary_node,
8975                                                   instance.name,
8976                                                   instance.hypervisor)
8977         remote_info.Raise("Error checking node %s" % instance.primary_node)
8978         remote_info = remote_info.payload
8979         if remote_info and "state" in remote_info:
8980           remote_state = "up"
8981         else:
8982           remote_state = "down"
8983       else:
8984         remote_state = None
8985       if instance.admin_up:
8986         config_state = "up"
8987       else:
8988         config_state = "down"
8989
8990       disks = [self._ComputeDiskStatus(instance, None, device)
8991                for device in instance.disks]
8992
8993       idict = {
8994         "name": instance.name,
8995         "config_state": config_state,
8996         "run_state": remote_state,
8997         "pnode": instance.primary_node,
8998         "snodes": instance.secondary_nodes,
8999         "os": instance.os,
9000         # this happens to be the same format used for hooks
9001         "nics": _NICListToTuple(self, instance.nics),
9002         "disk_template": instance.disk_template,
9003         "disks": disks,
9004         "hypervisor": instance.hypervisor,
9005         "network_port": instance.network_port,
9006         "hv_instance": instance.hvparams,
9007         "hv_actual": cluster.FillHV(instance, skip_globals=True),
9008         "be_instance": instance.beparams,
9009         "be_actual": cluster.FillBE(instance),
9010         "os_instance": instance.osparams,
9011         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9012         "serial_no": instance.serial_no,
9013         "mtime": instance.mtime,
9014         "ctime": instance.ctime,
9015         "uuid": instance.uuid,
9016         }
9017
9018       result[instance.name] = idict
9019
9020     return result
9021
9022
9023 class LUSetInstanceParams(LogicalUnit):
9024   """Modifies an instances's parameters.
9025
9026   """
9027   HPATH = "instance-modify"
9028   HTYPE = constants.HTYPE_INSTANCE
9029   _OP_PARAMS = [
9030     _PInstanceName,
9031     ("nics", ht.EmptyList, ht.TList),
9032     ("disks", ht.EmptyList, ht.TList),
9033     ("beparams", ht.EmptyDict, ht.TDict),
9034     ("hvparams", ht.EmptyDict, ht.TDict),
9035     ("disk_template", None, ht.TMaybeString),
9036     ("remote_node", None, ht.TMaybeString),
9037     ("os_name", None, ht.TMaybeString),
9038     ("force_variant", False, ht.TBool),
9039     ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
9040     _PForce,
9041     ]
9042   REQ_BGL = False
9043
9044   def CheckArguments(self):
9045     if not (self.op.nics or self.op.disks or self.op.disk_template or
9046             self.op.hvparams or self.op.beparams or self.op.os_name):
9047       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9048
9049     if self.op.hvparams:
9050       _CheckGlobalHvParams(self.op.hvparams)
9051
9052     # Disk validation
9053     disk_addremove = 0
9054     for disk_op, disk_dict in self.op.disks:
9055       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9056       if disk_op == constants.DDM_REMOVE:
9057         disk_addremove += 1
9058         continue
9059       elif disk_op == constants.DDM_ADD:
9060         disk_addremove += 1
9061       else:
9062         if not isinstance(disk_op, int):
9063           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9064         if not isinstance(disk_dict, dict):
9065           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9066           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9067
9068       if disk_op == constants.DDM_ADD:
9069         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9070         if mode not in constants.DISK_ACCESS_SET:
9071           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9072                                      errors.ECODE_INVAL)
9073         size = disk_dict.get('size', None)
9074         if size is None:
9075           raise errors.OpPrereqError("Required disk parameter size missing",
9076                                      errors.ECODE_INVAL)
9077         try:
9078           size = int(size)
9079         except (TypeError, ValueError), err:
9080           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9081                                      str(err), errors.ECODE_INVAL)
9082         disk_dict['size'] = size
9083       else:
9084         # modification of disk
9085         if 'size' in disk_dict:
9086           raise errors.OpPrereqError("Disk size change not possible, use"
9087                                      " grow-disk", errors.ECODE_INVAL)
9088
9089     if disk_addremove > 1:
9090       raise errors.OpPrereqError("Only one disk add or remove operation"
9091                                  " supported at a time", errors.ECODE_INVAL)
9092
9093     if self.op.disks and self.op.disk_template is not None:
9094       raise errors.OpPrereqError("Disk template conversion and other disk"
9095                                  " changes not supported at the same time",
9096                                  errors.ECODE_INVAL)
9097
9098     if self.op.disk_template:
9099       _CheckDiskTemplate(self.op.disk_template)
9100       if (self.op.disk_template in constants.DTS_NET_MIRROR and
9101           self.op.remote_node is None):
9102         raise errors.OpPrereqError("Changing the disk template to a mirrored"
9103                                    " one requires specifying a secondary node",
9104                                    errors.ECODE_INVAL)
9105
9106     # NIC validation
9107     nic_addremove = 0
9108     for nic_op, nic_dict in self.op.nics:
9109       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9110       if nic_op == constants.DDM_REMOVE:
9111         nic_addremove += 1
9112         continue
9113       elif nic_op == constants.DDM_ADD:
9114         nic_addremove += 1
9115       else:
9116         if not isinstance(nic_op, int):
9117           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9118         if not isinstance(nic_dict, dict):
9119           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9120           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9121
9122       # nic_dict should be a dict
9123       nic_ip = nic_dict.get('ip', None)
9124       if nic_ip is not None:
9125         if nic_ip.lower() == constants.VALUE_NONE:
9126           nic_dict['ip'] = None
9127         else:
9128           if not netutils.IPAddress.IsValid(nic_ip):
9129             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9130                                        errors.ECODE_INVAL)
9131
9132       nic_bridge = nic_dict.get('bridge', None)
9133       nic_link = nic_dict.get('link', None)
9134       if nic_bridge and nic_link:
9135         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9136                                    " at the same time", errors.ECODE_INVAL)
9137       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9138         nic_dict['bridge'] = None
9139       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9140         nic_dict['link'] = None
9141
9142       if nic_op == constants.DDM_ADD:
9143         nic_mac = nic_dict.get('mac', None)
9144         if nic_mac is None:
9145           nic_dict['mac'] = constants.VALUE_AUTO
9146
9147       if 'mac' in nic_dict:
9148         nic_mac = nic_dict['mac']
9149         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9150           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9151
9152         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9153           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9154                                      " modifying an existing nic",
9155                                      errors.ECODE_INVAL)
9156
9157     if nic_addremove > 1:
9158       raise errors.OpPrereqError("Only one NIC add or remove operation"
9159                                  " supported at a time", errors.ECODE_INVAL)
9160
9161   def ExpandNames(self):
9162     self._ExpandAndLockInstance()
9163     self.needed_locks[locking.LEVEL_NODE] = []
9164     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9165
9166   def DeclareLocks(self, level):
9167     if level == locking.LEVEL_NODE:
9168       self._LockInstancesNodes()
9169       if self.op.disk_template and self.op.remote_node:
9170         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9171         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9172
9173   def BuildHooksEnv(self):
9174     """Build hooks env.
9175
9176     This runs on the master, primary and secondaries.
9177
9178     """
9179     args = dict()
9180     if constants.BE_MEMORY in self.be_new:
9181       args['memory'] = self.be_new[constants.BE_MEMORY]
9182     if constants.BE_VCPUS in self.be_new:
9183       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9184     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9185     # information at all.
9186     if self.op.nics:
9187       args['nics'] = []
9188       nic_override = dict(self.op.nics)
9189       for idx, nic in enumerate(self.instance.nics):
9190         if idx in nic_override:
9191           this_nic_override = nic_override[idx]
9192         else:
9193           this_nic_override = {}
9194         if 'ip' in this_nic_override:
9195           ip = this_nic_override['ip']
9196         else:
9197           ip = nic.ip
9198         if 'mac' in this_nic_override:
9199           mac = this_nic_override['mac']
9200         else:
9201           mac = nic.mac
9202         if idx in self.nic_pnew:
9203           nicparams = self.nic_pnew[idx]
9204         else:
9205           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9206         mode = nicparams[constants.NIC_MODE]
9207         link = nicparams[constants.NIC_LINK]
9208         args['nics'].append((ip, mac, mode, link))
9209       if constants.DDM_ADD in nic_override:
9210         ip = nic_override[constants.DDM_ADD].get('ip', None)
9211         mac = nic_override[constants.DDM_ADD]['mac']
9212         nicparams = self.nic_pnew[constants.DDM_ADD]
9213         mode = nicparams[constants.NIC_MODE]
9214         link = nicparams[constants.NIC_LINK]
9215         args['nics'].append((ip, mac, mode, link))
9216       elif constants.DDM_REMOVE in nic_override:
9217         del args['nics'][-1]
9218
9219     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9220     if self.op.disk_template:
9221       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9222     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9223     return env, nl, nl
9224
9225   def CheckPrereq(self):
9226     """Check prerequisites.
9227
9228     This only checks the instance list against the existing names.
9229
9230     """
9231     # checking the new params on the primary/secondary nodes
9232
9233     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9234     cluster = self.cluster = self.cfg.GetClusterInfo()
9235     assert self.instance is not None, \
9236       "Cannot retrieve locked instance %s" % self.op.instance_name
9237     pnode = instance.primary_node
9238     nodelist = list(instance.all_nodes)
9239
9240     # OS change
9241     if self.op.os_name and not self.op.force:
9242       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9243                       self.op.force_variant)
9244       instance_os = self.op.os_name
9245     else:
9246       instance_os = instance.os
9247
9248     if self.op.disk_template:
9249       if instance.disk_template == self.op.disk_template:
9250         raise errors.OpPrereqError("Instance already has disk template %s" %
9251                                    instance.disk_template, errors.ECODE_INVAL)
9252
9253       if (instance.disk_template,
9254           self.op.disk_template) not in self._DISK_CONVERSIONS:
9255         raise errors.OpPrereqError("Unsupported disk template conversion from"
9256                                    " %s to %s" % (instance.disk_template,
9257                                                   self.op.disk_template),
9258                                    errors.ECODE_INVAL)
9259       _CheckInstanceDown(self, instance, "cannot change disk template")
9260       if self.op.disk_template in constants.DTS_NET_MIRROR:
9261         if self.op.remote_node == pnode:
9262           raise errors.OpPrereqError("Given new secondary node %s is the same"
9263                                      " as the primary node of the instance" %
9264                                      self.op.remote_node, errors.ECODE_STATE)
9265         _CheckNodeOnline(self, self.op.remote_node)
9266         _CheckNodeNotDrained(self, self.op.remote_node)
9267         disks = [{"size": d.size, "vg": d.vg} for d in instance.disks]
9268         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9269         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9270
9271     # hvparams processing
9272     if self.op.hvparams:
9273       hv_type = instance.hypervisor
9274       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9275       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9276       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9277
9278       # local check
9279       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9280       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9281       self.hv_new = hv_new # the new actual values
9282       self.hv_inst = i_hvdict # the new dict (without defaults)
9283     else:
9284       self.hv_new = self.hv_inst = {}
9285
9286     # beparams processing
9287     if self.op.beparams:
9288       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9289                                    use_none=True)
9290       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9291       be_new = cluster.SimpleFillBE(i_bedict)
9292       self.be_new = be_new # the new actual values
9293       self.be_inst = i_bedict # the new dict (without defaults)
9294     else:
9295       self.be_new = self.be_inst = {}
9296
9297     # osparams processing
9298     if self.op.osparams:
9299       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9300       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9301       self.os_inst = i_osdict # the new dict (without defaults)
9302     else:
9303       self.os_inst = {}
9304
9305     self.warn = []
9306
9307     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9308       mem_check_list = [pnode]
9309       if be_new[constants.BE_AUTO_BALANCE]:
9310         # either we changed auto_balance to yes or it was from before
9311         mem_check_list.extend(instance.secondary_nodes)
9312       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9313                                                   instance.hypervisor)
9314       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
9315                                          instance.hypervisor)
9316       pninfo = nodeinfo[pnode]
9317       msg = pninfo.fail_msg
9318       if msg:
9319         # Assume the primary node is unreachable and go ahead
9320         self.warn.append("Can't get info from primary node %s: %s" %
9321                          (pnode,  msg))
9322       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9323         self.warn.append("Node data from primary node %s doesn't contain"
9324                          " free memory information" % pnode)
9325       elif instance_info.fail_msg:
9326         self.warn.append("Can't get instance runtime information: %s" %
9327                         instance_info.fail_msg)
9328       else:
9329         if instance_info.payload:
9330           current_mem = int(instance_info.payload['memory'])
9331         else:
9332           # Assume instance not running
9333           # (there is a slight race condition here, but it's not very probable,
9334           # and we have no other way to check)
9335           current_mem = 0
9336         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9337                     pninfo.payload['memory_free'])
9338         if miss_mem > 0:
9339           raise errors.OpPrereqError("This change will prevent the instance"
9340                                      " from starting, due to %d MB of memory"
9341                                      " missing on its primary node" % miss_mem,
9342                                      errors.ECODE_NORES)
9343
9344       if be_new[constants.BE_AUTO_BALANCE]:
9345         for node, nres in nodeinfo.items():
9346           if node not in instance.secondary_nodes:
9347             continue
9348           msg = nres.fail_msg
9349           if msg:
9350             self.warn.append("Can't get info from secondary node %s: %s" %
9351                              (node, msg))
9352           elif not isinstance(nres.payload.get('memory_free', None), int):
9353             self.warn.append("Secondary node %s didn't return free"
9354                              " memory information" % node)
9355           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9356             self.warn.append("Not enough memory to failover instance to"
9357                              " secondary node %s" % node)
9358
9359     # NIC processing
9360     self.nic_pnew = {}
9361     self.nic_pinst = {}
9362     for nic_op, nic_dict in self.op.nics:
9363       if nic_op == constants.DDM_REMOVE:
9364         if not instance.nics:
9365           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9366                                      errors.ECODE_INVAL)
9367         continue
9368       if nic_op != constants.DDM_ADD:
9369         # an existing nic
9370         if not instance.nics:
9371           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9372                                      " no NICs" % nic_op,
9373                                      errors.ECODE_INVAL)
9374         if nic_op < 0 or nic_op >= len(instance.nics):
9375           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9376                                      " are 0 to %d" %
9377                                      (nic_op, len(instance.nics) - 1),
9378                                      errors.ECODE_INVAL)
9379         old_nic_params = instance.nics[nic_op].nicparams
9380         old_nic_ip = instance.nics[nic_op].ip
9381       else:
9382         old_nic_params = {}
9383         old_nic_ip = None
9384
9385       update_params_dict = dict([(key, nic_dict[key])
9386                                  for key in constants.NICS_PARAMETERS
9387                                  if key in nic_dict])
9388
9389       if 'bridge' in nic_dict:
9390         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9391
9392       new_nic_params = _GetUpdatedParams(old_nic_params,
9393                                          update_params_dict)
9394       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9395       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9396       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9397       self.nic_pinst[nic_op] = new_nic_params
9398       self.nic_pnew[nic_op] = new_filled_nic_params
9399       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9400
9401       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9402         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9403         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9404         if msg:
9405           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9406           if self.op.force:
9407             self.warn.append(msg)
9408           else:
9409             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9410       if new_nic_mode == constants.NIC_MODE_ROUTED:
9411         if 'ip' in nic_dict:
9412           nic_ip = nic_dict['ip']
9413         else:
9414           nic_ip = old_nic_ip
9415         if nic_ip is None:
9416           raise errors.OpPrereqError('Cannot set the nic ip to None'
9417                                      ' on a routed nic', errors.ECODE_INVAL)
9418       if 'mac' in nic_dict:
9419         nic_mac = nic_dict['mac']
9420         if nic_mac is None:
9421           raise errors.OpPrereqError('Cannot set the nic mac to None',
9422                                      errors.ECODE_INVAL)
9423         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9424           # otherwise generate the mac
9425           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9426         else:
9427           # or validate/reserve the current one
9428           try:
9429             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9430           except errors.ReservationError:
9431             raise errors.OpPrereqError("MAC address %s already in use"
9432                                        " in cluster" % nic_mac,
9433                                        errors.ECODE_NOTUNIQUE)
9434
9435     # DISK processing
9436     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9437       raise errors.OpPrereqError("Disk operations not supported for"
9438                                  " diskless instances",
9439                                  errors.ECODE_INVAL)
9440     for disk_op, _ in self.op.disks:
9441       if disk_op == constants.DDM_REMOVE:
9442         if len(instance.disks) == 1:
9443           raise errors.OpPrereqError("Cannot remove the last disk of"
9444                                      " an instance", errors.ECODE_INVAL)
9445         _CheckInstanceDown(self, instance, "cannot remove disks")
9446
9447       if (disk_op == constants.DDM_ADD and
9448           len(instance.nics) >= constants.MAX_DISKS):
9449         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9450                                    " add more" % constants.MAX_DISKS,
9451                                    errors.ECODE_STATE)
9452       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9453         # an existing disk
9454         if disk_op < 0 or disk_op >= len(instance.disks):
9455           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9456                                      " are 0 to %d" %
9457                                      (disk_op, len(instance.disks)),
9458                                      errors.ECODE_INVAL)
9459
9460     return
9461
9462   def _ConvertPlainToDrbd(self, feedback_fn):
9463     """Converts an instance from plain to drbd.
9464
9465     """
9466     feedback_fn("Converting template to drbd")
9467     instance = self.instance
9468     pnode = instance.primary_node
9469     snode = self.op.remote_node
9470
9471     # create a fake disk info for _GenerateDiskTemplate
9472     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9473     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9474                                       instance.name, pnode, [snode],
9475                                       disk_info, None, None, 0, feedback_fn)
9476     info = _GetInstanceInfoText(instance)
9477     feedback_fn("Creating aditional volumes...")
9478     # first, create the missing data and meta devices
9479     for disk in new_disks:
9480       # unfortunately this is... not too nice
9481       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9482                             info, True)
9483       for child in disk.children:
9484         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9485     # at this stage, all new LVs have been created, we can rename the
9486     # old ones
9487     feedback_fn("Renaming original volumes...")
9488     rename_list = [(o, n.children[0].logical_id)
9489                    for (o, n) in zip(instance.disks, new_disks)]
9490     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9491     result.Raise("Failed to rename original LVs")
9492
9493     feedback_fn("Initializing DRBD devices...")
9494     # all child devices are in place, we can now create the DRBD devices
9495     for disk in new_disks:
9496       for node in [pnode, snode]:
9497         f_create = node == pnode
9498         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9499
9500     # at this point, the instance has been modified
9501     instance.disk_template = constants.DT_DRBD8
9502     instance.disks = new_disks
9503     self.cfg.Update(instance, feedback_fn)
9504
9505     # disks are created, waiting for sync
9506     disk_abort = not _WaitForSync(self, instance)
9507     if disk_abort:
9508       raise errors.OpExecError("There are some degraded disks for"
9509                                " this instance, please cleanup manually")
9510
9511   def _ConvertDrbdToPlain(self, feedback_fn):
9512     """Converts an instance from drbd to plain.
9513
9514     """
9515     instance = self.instance
9516     assert len(instance.secondary_nodes) == 1
9517     pnode = instance.primary_node
9518     snode = instance.secondary_nodes[0]
9519     feedback_fn("Converting template to plain")
9520
9521     old_disks = instance.disks
9522     new_disks = [d.children[0] for d in old_disks]
9523
9524     # copy over size and mode
9525     for parent, child in zip(old_disks, new_disks):
9526       child.size = parent.size
9527       child.mode = parent.mode
9528
9529     # update instance structure
9530     instance.disks = new_disks
9531     instance.disk_template = constants.DT_PLAIN
9532     self.cfg.Update(instance, feedback_fn)
9533
9534     feedback_fn("Removing volumes on the secondary node...")
9535     for disk in old_disks:
9536       self.cfg.SetDiskID(disk, snode)
9537       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9538       if msg:
9539         self.LogWarning("Could not remove block device %s on node %s,"
9540                         " continuing anyway: %s", disk.iv_name, snode, msg)
9541
9542     feedback_fn("Removing unneeded volumes on the primary node...")
9543     for idx, disk in enumerate(old_disks):
9544       meta = disk.children[1]
9545       self.cfg.SetDiskID(meta, pnode)
9546       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9547       if msg:
9548         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9549                         " continuing anyway: %s", idx, pnode, msg)
9550
9551
9552   def Exec(self, feedback_fn):
9553     """Modifies an instance.
9554
9555     All parameters take effect only at the next restart of the instance.
9556
9557     """
9558     # Process here the warnings from CheckPrereq, as we don't have a
9559     # feedback_fn there.
9560     for warn in self.warn:
9561       feedback_fn("WARNING: %s" % warn)
9562
9563     result = []
9564     instance = self.instance
9565     # disk changes
9566     for disk_op, disk_dict in self.op.disks:
9567       if disk_op == constants.DDM_REMOVE:
9568         # remove the last disk
9569         device = instance.disks.pop()
9570         device_idx = len(instance.disks)
9571         for node, disk in device.ComputeNodeTree(instance.primary_node):
9572           self.cfg.SetDiskID(disk, node)
9573           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9574           if msg:
9575             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9576                             " continuing anyway", device_idx, node, msg)
9577         result.append(("disk/%d" % device_idx, "remove"))
9578       elif disk_op == constants.DDM_ADD:
9579         # add a new disk
9580         if instance.disk_template == constants.DT_FILE:
9581           file_driver, file_path = instance.disks[0].logical_id
9582           file_path = os.path.dirname(file_path)
9583         else:
9584           file_driver = file_path = None
9585         disk_idx_base = len(instance.disks)
9586         new_disk = _GenerateDiskTemplate(self,
9587                                          instance.disk_template,
9588                                          instance.name, instance.primary_node,
9589                                          instance.secondary_nodes,
9590                                          [disk_dict],
9591                                          file_path,
9592                                          file_driver,
9593                                          disk_idx_base, feedback_fn)[0]
9594         instance.disks.append(new_disk)
9595         info = _GetInstanceInfoText(instance)
9596
9597         logging.info("Creating volume %s for instance %s",
9598                      new_disk.iv_name, instance.name)
9599         # Note: this needs to be kept in sync with _CreateDisks
9600         #HARDCODE
9601         for node in instance.all_nodes:
9602           f_create = node == instance.primary_node
9603           try:
9604             _CreateBlockDev(self, node, instance, new_disk,
9605                             f_create, info, f_create)
9606           except errors.OpExecError, err:
9607             self.LogWarning("Failed to create volume %s (%s) on"
9608                             " node %s: %s",
9609                             new_disk.iv_name, new_disk, node, err)
9610         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9611                        (new_disk.size, new_disk.mode)))
9612       else:
9613         # change a given disk
9614         instance.disks[disk_op].mode = disk_dict['mode']
9615         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9616
9617     if self.op.disk_template:
9618       r_shut = _ShutdownInstanceDisks(self, instance)
9619       if not r_shut:
9620         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9621                                  " proceed with disk template conversion")
9622       mode = (instance.disk_template, self.op.disk_template)
9623       try:
9624         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9625       except:
9626         self.cfg.ReleaseDRBDMinors(instance.name)
9627         raise
9628       result.append(("disk_template", self.op.disk_template))
9629
9630     # NIC changes
9631     for nic_op, nic_dict in self.op.nics:
9632       if nic_op == constants.DDM_REMOVE:
9633         # remove the last nic
9634         del instance.nics[-1]
9635         result.append(("nic.%d" % len(instance.nics), "remove"))
9636       elif nic_op == constants.DDM_ADD:
9637         # mac and bridge should be set, by now
9638         mac = nic_dict['mac']
9639         ip = nic_dict.get('ip', None)
9640         nicparams = self.nic_pinst[constants.DDM_ADD]
9641         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9642         instance.nics.append(new_nic)
9643         result.append(("nic.%d" % (len(instance.nics) - 1),
9644                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9645                        (new_nic.mac, new_nic.ip,
9646                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9647                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9648                        )))
9649       else:
9650         for key in 'mac', 'ip':
9651           if key in nic_dict:
9652             setattr(instance.nics[nic_op], key, nic_dict[key])
9653         if nic_op in self.nic_pinst:
9654           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9655         for key, val in nic_dict.iteritems():
9656           result.append(("nic.%s/%d" % (key, nic_op), val))
9657
9658     # hvparams changes
9659     if self.op.hvparams:
9660       instance.hvparams = self.hv_inst
9661       for key, val in self.op.hvparams.iteritems():
9662         result.append(("hv/%s" % key, val))
9663
9664     # beparams changes
9665     if self.op.beparams:
9666       instance.beparams = self.be_inst
9667       for key, val in self.op.beparams.iteritems():
9668         result.append(("be/%s" % key, val))
9669
9670     # OS change
9671     if self.op.os_name:
9672       instance.os = self.op.os_name
9673
9674     # osparams changes
9675     if self.op.osparams:
9676       instance.osparams = self.os_inst
9677       for key, val in self.op.osparams.iteritems():
9678         result.append(("os/%s" % key, val))
9679
9680     self.cfg.Update(instance, feedback_fn)
9681
9682     return result
9683
9684   _DISK_CONVERSIONS = {
9685     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9686     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9687     }
9688
9689
9690 class LUQueryExports(NoHooksLU):
9691   """Query the exports list
9692
9693   """
9694   _OP_PARAMS = [
9695     ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9696     ("use_locking", False, ht.TBool),
9697     ]
9698   REQ_BGL = False
9699
9700   def ExpandNames(self):
9701     self.needed_locks = {}
9702     self.share_locks[locking.LEVEL_NODE] = 1
9703     if not self.op.nodes:
9704       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9705     else:
9706       self.needed_locks[locking.LEVEL_NODE] = \
9707         _GetWantedNodes(self, self.op.nodes)
9708
9709   def Exec(self, feedback_fn):
9710     """Compute the list of all the exported system images.
9711
9712     @rtype: dict
9713     @return: a dictionary with the structure node->(export-list)
9714         where export-list is a list of the instances exported on
9715         that node.
9716
9717     """
9718     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9719     rpcresult = self.rpc.call_export_list(self.nodes)
9720     result = {}
9721     for node in rpcresult:
9722       if rpcresult[node].fail_msg:
9723         result[node] = False
9724       else:
9725         result[node] = rpcresult[node].payload
9726
9727     return result
9728
9729
9730 class LUPrepareExport(NoHooksLU):
9731   """Prepares an instance for an export and returns useful information.
9732
9733   """
9734   _OP_PARAMS = [
9735     _PInstanceName,
9736     ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9737     ]
9738   REQ_BGL = False
9739
9740   def ExpandNames(self):
9741     self._ExpandAndLockInstance()
9742
9743   def CheckPrereq(self):
9744     """Check prerequisites.
9745
9746     """
9747     instance_name = self.op.instance_name
9748
9749     self.instance = self.cfg.GetInstanceInfo(instance_name)
9750     assert self.instance is not None, \
9751           "Cannot retrieve locked instance %s" % self.op.instance_name
9752     _CheckNodeOnline(self, self.instance.primary_node)
9753
9754     self._cds = _GetClusterDomainSecret()
9755
9756   def Exec(self, feedback_fn):
9757     """Prepares an instance for an export.
9758
9759     """
9760     instance = self.instance
9761
9762     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9763       salt = utils.GenerateSecret(8)
9764
9765       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9766       result = self.rpc.call_x509_cert_create(instance.primary_node,
9767                                               constants.RIE_CERT_VALIDITY)
9768       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9769
9770       (name, cert_pem) = result.payload
9771
9772       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9773                                              cert_pem)
9774
9775       return {
9776         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9777         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9778                           salt),
9779         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9780         }
9781
9782     return None
9783
9784
9785 class LUExportInstance(LogicalUnit):
9786   """Export an instance to an image in the cluster.
9787
9788   """
9789   HPATH = "instance-export"
9790   HTYPE = constants.HTYPE_INSTANCE
9791   _OP_PARAMS = [
9792     _PInstanceName,
9793     ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9794     ("shutdown", True, ht.TBool),
9795     _PShutdownTimeout,
9796     ("remove_instance", False, ht.TBool),
9797     ("ignore_remove_failures", False, ht.TBool),
9798     ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9799     ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9800     ("destination_x509_ca", None, ht.TMaybeString),
9801     ]
9802   REQ_BGL = False
9803
9804   def CheckArguments(self):
9805     """Check the arguments.
9806
9807     """
9808     self.x509_key_name = self.op.x509_key_name
9809     self.dest_x509_ca_pem = self.op.destination_x509_ca
9810
9811     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9812       if not self.x509_key_name:
9813         raise errors.OpPrereqError("Missing X509 key name for encryption",
9814                                    errors.ECODE_INVAL)
9815
9816       if not self.dest_x509_ca_pem:
9817         raise errors.OpPrereqError("Missing destination X509 CA",
9818                                    errors.ECODE_INVAL)
9819
9820   def ExpandNames(self):
9821     self._ExpandAndLockInstance()
9822
9823     # Lock all nodes for local exports
9824     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9825       # FIXME: lock only instance primary and destination node
9826       #
9827       # Sad but true, for now we have do lock all nodes, as we don't know where
9828       # the previous export might be, and in this LU we search for it and
9829       # remove it from its current node. In the future we could fix this by:
9830       #  - making a tasklet to search (share-lock all), then create the
9831       #    new one, then one to remove, after
9832       #  - removing the removal operation altogether
9833       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9834
9835   def DeclareLocks(self, level):
9836     """Last minute lock declaration."""
9837     # All nodes are locked anyway, so nothing to do here.
9838
9839   def BuildHooksEnv(self):
9840     """Build hooks env.
9841
9842     This will run on the master, primary node and target node.
9843
9844     """
9845     env = {
9846       "EXPORT_MODE": self.op.mode,
9847       "EXPORT_NODE": self.op.target_node,
9848       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9849       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9850       # TODO: Generic function for boolean env variables
9851       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9852       }
9853
9854     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9855
9856     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9857
9858     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9859       nl.append(self.op.target_node)
9860
9861     return env, nl, nl
9862
9863   def CheckPrereq(self):
9864     """Check prerequisites.
9865
9866     This checks that the instance and node names are valid.
9867
9868     """
9869     instance_name = self.op.instance_name
9870
9871     self.instance = self.cfg.GetInstanceInfo(instance_name)
9872     assert self.instance is not None, \
9873           "Cannot retrieve locked instance %s" % self.op.instance_name
9874     _CheckNodeOnline(self, self.instance.primary_node)
9875
9876     if (self.op.remove_instance and self.instance.admin_up and
9877         not self.op.shutdown):
9878       raise errors.OpPrereqError("Can not remove instance without shutting it"
9879                                  " down before")
9880
9881     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9882       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9883       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9884       assert self.dst_node is not None
9885
9886       _CheckNodeOnline(self, self.dst_node.name)
9887       _CheckNodeNotDrained(self, self.dst_node.name)
9888
9889       self._cds = None
9890       self.dest_disk_info = None
9891       self.dest_x509_ca = None
9892
9893     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9894       self.dst_node = None
9895
9896       if len(self.op.target_node) != len(self.instance.disks):
9897         raise errors.OpPrereqError(("Received destination information for %s"
9898                                     " disks, but instance %s has %s disks") %
9899                                    (len(self.op.target_node), instance_name,
9900                                     len(self.instance.disks)),
9901                                    errors.ECODE_INVAL)
9902
9903       cds = _GetClusterDomainSecret()
9904
9905       # Check X509 key name
9906       try:
9907         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9908       except (TypeError, ValueError), err:
9909         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9910
9911       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9912         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9913                                    errors.ECODE_INVAL)
9914
9915       # Load and verify CA
9916       try:
9917         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9918       except OpenSSL.crypto.Error, err:
9919         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9920                                    (err, ), errors.ECODE_INVAL)
9921
9922       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9923       if errcode is not None:
9924         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9925                                    (msg, ), errors.ECODE_INVAL)
9926
9927       self.dest_x509_ca = cert
9928
9929       # Verify target information
9930       disk_info = []
9931       for idx, disk_data in enumerate(self.op.target_node):
9932         try:
9933           (host, port, magic) = \
9934             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9935         except errors.GenericError, err:
9936           raise errors.OpPrereqError("Target info for disk %s: %s" %
9937                                      (idx, err), errors.ECODE_INVAL)
9938
9939         disk_info.append((host, port, magic))
9940
9941       assert len(disk_info) == len(self.op.target_node)
9942       self.dest_disk_info = disk_info
9943
9944     else:
9945       raise errors.ProgrammerError("Unhandled export mode %r" %
9946                                    self.op.mode)
9947
9948     # instance disk type verification
9949     # TODO: Implement export support for file-based disks
9950     for disk in self.instance.disks:
9951       if disk.dev_type == constants.LD_FILE:
9952         raise errors.OpPrereqError("Export not supported for instances with"
9953                                    " file-based disks", errors.ECODE_INVAL)
9954
9955   def _CleanupExports(self, feedback_fn):
9956     """Removes exports of current instance from all other nodes.
9957
9958     If an instance in a cluster with nodes A..D was exported to node C, its
9959     exports will be removed from the nodes A, B and D.
9960
9961     """
9962     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9963
9964     nodelist = self.cfg.GetNodeList()
9965     nodelist.remove(self.dst_node.name)
9966
9967     # on one-node clusters nodelist will be empty after the removal
9968     # if we proceed the backup would be removed because OpQueryExports
9969     # substitutes an empty list with the full cluster node list.
9970     iname = self.instance.name
9971     if nodelist:
9972       feedback_fn("Removing old exports for instance %s" % iname)
9973       exportlist = self.rpc.call_export_list(nodelist)
9974       for node in exportlist:
9975         if exportlist[node].fail_msg:
9976           continue
9977         if iname in exportlist[node].payload:
9978           msg = self.rpc.call_export_remove(node, iname).fail_msg
9979           if msg:
9980             self.LogWarning("Could not remove older export for instance %s"
9981                             " on node %s: %s", iname, node, msg)
9982
9983   def Exec(self, feedback_fn):
9984     """Export an instance to an image in the cluster.
9985
9986     """
9987     assert self.op.mode in constants.EXPORT_MODES
9988
9989     instance = self.instance
9990     src_node = instance.primary_node
9991
9992     if self.op.shutdown:
9993       # shutdown the instance, but not the disks
9994       feedback_fn("Shutting down instance %s" % instance.name)
9995       result = self.rpc.call_instance_shutdown(src_node, instance,
9996                                                self.op.shutdown_timeout)
9997       # TODO: Maybe ignore failures if ignore_remove_failures is set
9998       result.Raise("Could not shutdown instance %s on"
9999                    " node %s" % (instance.name, src_node))
10000
10001     # set the disks ID correctly since call_instance_start needs the
10002     # correct drbd minor to create the symlinks
10003     for disk in instance.disks:
10004       self.cfg.SetDiskID(disk, src_node)
10005
10006     activate_disks = (not instance.admin_up)
10007
10008     if activate_disks:
10009       # Activate the instance disks if we'exporting a stopped instance
10010       feedback_fn("Activating disks for %s" % instance.name)
10011       _StartInstanceDisks(self, instance, None)
10012
10013     try:
10014       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10015                                                      instance)
10016
10017       helper.CreateSnapshots()
10018       try:
10019         if (self.op.shutdown and instance.admin_up and
10020             not self.op.remove_instance):
10021           assert not activate_disks
10022           feedback_fn("Starting instance %s" % instance.name)
10023           result = self.rpc.call_instance_start(src_node, instance, None, None)
10024           msg = result.fail_msg
10025           if msg:
10026             feedback_fn("Failed to start instance: %s" % msg)
10027             _ShutdownInstanceDisks(self, instance)
10028             raise errors.OpExecError("Could not start instance: %s" % msg)
10029
10030         if self.op.mode == constants.EXPORT_MODE_LOCAL:
10031           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10032         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10033           connect_timeout = constants.RIE_CONNECT_TIMEOUT
10034           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10035
10036           (key_name, _, _) = self.x509_key_name
10037
10038           dest_ca_pem = \
10039             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10040                                             self.dest_x509_ca)
10041
10042           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10043                                                      key_name, dest_ca_pem,
10044                                                      timeouts)
10045       finally:
10046         helper.Cleanup()
10047
10048       # Check for backwards compatibility
10049       assert len(dresults) == len(instance.disks)
10050       assert compat.all(isinstance(i, bool) for i in dresults), \
10051              "Not all results are boolean: %r" % dresults
10052
10053     finally:
10054       if activate_disks:
10055         feedback_fn("Deactivating disks for %s" % instance.name)
10056         _ShutdownInstanceDisks(self, instance)
10057
10058     if not (compat.all(dresults) and fin_resu):
10059       failures = []
10060       if not fin_resu:
10061         failures.append("export finalization")
10062       if not compat.all(dresults):
10063         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10064                                if not dsk)
10065         failures.append("disk export: disk(s) %s" % fdsk)
10066
10067       raise errors.OpExecError("Export failed, errors in %s" %
10068                                utils.CommaJoin(failures))
10069
10070     # At this point, the export was successful, we can cleanup/finish
10071
10072     # Remove instance if requested
10073     if self.op.remove_instance:
10074       feedback_fn("Removing instance %s" % instance.name)
10075       _RemoveInstance(self, feedback_fn, instance,
10076                       self.op.ignore_remove_failures)
10077
10078     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10079       self._CleanupExports(feedback_fn)
10080
10081     return fin_resu, dresults
10082
10083
10084 class LURemoveExport(NoHooksLU):
10085   """Remove exports related to the named instance.
10086
10087   """
10088   _OP_PARAMS = [
10089     _PInstanceName,
10090     ]
10091   REQ_BGL = False
10092
10093   def ExpandNames(self):
10094     self.needed_locks = {}
10095     # We need all nodes to be locked in order for RemoveExport to work, but we
10096     # don't need to lock the instance itself, as nothing will happen to it (and
10097     # we can remove exports also for a removed instance)
10098     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10099
10100   def Exec(self, feedback_fn):
10101     """Remove any export.
10102
10103     """
10104     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10105     # If the instance was not found we'll try with the name that was passed in.
10106     # This will only work if it was an FQDN, though.
10107     fqdn_warn = False
10108     if not instance_name:
10109       fqdn_warn = True
10110       instance_name = self.op.instance_name
10111
10112     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10113     exportlist = self.rpc.call_export_list(locked_nodes)
10114     found = False
10115     for node in exportlist:
10116       msg = exportlist[node].fail_msg
10117       if msg:
10118         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10119         continue
10120       if instance_name in exportlist[node].payload:
10121         found = True
10122         result = self.rpc.call_export_remove(node, instance_name)
10123         msg = result.fail_msg
10124         if msg:
10125           logging.error("Could not remove export for instance %s"
10126                         " on node %s: %s", instance_name, node, msg)
10127
10128     if fqdn_warn and not found:
10129       feedback_fn("Export not found. If trying to remove an export belonging"
10130                   " to a deleted instance please use its Fully Qualified"
10131                   " Domain Name.")
10132
10133
10134 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10135   """Generic tags LU.
10136
10137   This is an abstract class which is the parent of all the other tags LUs.
10138
10139   """
10140
10141   def ExpandNames(self):
10142     self.needed_locks = {}
10143     if self.op.kind == constants.TAG_NODE:
10144       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10145       self.needed_locks[locking.LEVEL_NODE] = self.op.name
10146     elif self.op.kind == constants.TAG_INSTANCE:
10147       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10148       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10149
10150     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10151     # not possible to acquire the BGL based on opcode parameters)
10152
10153   def CheckPrereq(self):
10154     """Check prerequisites.
10155
10156     """
10157     if self.op.kind == constants.TAG_CLUSTER:
10158       self.target = self.cfg.GetClusterInfo()
10159     elif self.op.kind == constants.TAG_NODE:
10160       self.target = self.cfg.GetNodeInfo(self.op.name)
10161     elif self.op.kind == constants.TAG_INSTANCE:
10162       self.target = self.cfg.GetInstanceInfo(self.op.name)
10163     else:
10164       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10165                                  str(self.op.kind), errors.ECODE_INVAL)
10166
10167
10168 class LUGetTags(TagsLU):
10169   """Returns the tags of a given object.
10170
10171   """
10172   _OP_PARAMS = [
10173     ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10174     # Name is only meaningful for nodes and instances
10175     ("name", ht.NoDefault, ht.TMaybeString),
10176     ]
10177   REQ_BGL = False
10178
10179   def ExpandNames(self):
10180     TagsLU.ExpandNames(self)
10181
10182     # Share locks as this is only a read operation
10183     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10184
10185   def Exec(self, feedback_fn):
10186     """Returns the tag list.
10187
10188     """
10189     return list(self.target.GetTags())
10190
10191
10192 class LUSearchTags(NoHooksLU):
10193   """Searches the tags for a given pattern.
10194
10195   """
10196   _OP_PARAMS = [
10197     ("pattern", ht.NoDefault, ht.TNonEmptyString),
10198     ]
10199   REQ_BGL = False
10200
10201   def ExpandNames(self):
10202     self.needed_locks = {}
10203
10204   def CheckPrereq(self):
10205     """Check prerequisites.
10206
10207     This checks the pattern passed for validity by compiling it.
10208
10209     """
10210     try:
10211       self.re = re.compile(self.op.pattern)
10212     except re.error, err:
10213       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10214                                  (self.op.pattern, err), errors.ECODE_INVAL)
10215
10216   def Exec(self, feedback_fn):
10217     """Returns the tag list.
10218
10219     """
10220     cfg = self.cfg
10221     tgts = [("/cluster", cfg.GetClusterInfo())]
10222     ilist = cfg.GetAllInstancesInfo().values()
10223     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10224     nlist = cfg.GetAllNodesInfo().values()
10225     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10226     results = []
10227     for path, target in tgts:
10228       for tag in target.GetTags():
10229         if self.re.search(tag):
10230           results.append((path, tag))
10231     return results
10232
10233
10234 class LUAddTags(TagsLU):
10235   """Sets a tag on a given object.
10236
10237   """
10238   _OP_PARAMS = [
10239     ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10240     # Name is only meaningful for nodes and instances
10241     ("name", ht.NoDefault, ht.TMaybeString),
10242     ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10243     ]
10244   REQ_BGL = False
10245
10246   def CheckPrereq(self):
10247     """Check prerequisites.
10248
10249     This checks the type and length of the tag name and value.
10250
10251     """
10252     TagsLU.CheckPrereq(self)
10253     for tag in self.op.tags:
10254       objects.TaggableObject.ValidateTag(tag)
10255
10256   def Exec(self, feedback_fn):
10257     """Sets the tag.
10258
10259     """
10260     try:
10261       for tag in self.op.tags:
10262         self.target.AddTag(tag)
10263     except errors.TagError, err:
10264       raise errors.OpExecError("Error while setting tag: %s" % str(err))
10265     self.cfg.Update(self.target, feedback_fn)
10266
10267
10268 class LUDelTags(TagsLU):
10269   """Delete a list of tags from a given object.
10270
10271   """
10272   _OP_PARAMS = [
10273     ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10274     # Name is only meaningful for nodes and instances
10275     ("name", ht.NoDefault, ht.TMaybeString),
10276     ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10277     ]
10278   REQ_BGL = False
10279
10280   def CheckPrereq(self):
10281     """Check prerequisites.
10282
10283     This checks that we have the given tag.
10284
10285     """
10286     TagsLU.CheckPrereq(self)
10287     for tag in self.op.tags:
10288       objects.TaggableObject.ValidateTag(tag)
10289     del_tags = frozenset(self.op.tags)
10290     cur_tags = self.target.GetTags()
10291
10292     diff_tags = del_tags - cur_tags
10293     if diff_tags:
10294       diff_names = ("'%s'" % i for i in sorted(diff_tags))
10295       raise errors.OpPrereqError("Tag(s) %s not found" %
10296                                  (utils.CommaJoin(diff_names), ),
10297                                  errors.ECODE_NOENT)
10298
10299   def Exec(self, feedback_fn):
10300     """Remove the tag from the object.
10301
10302     """
10303     for tag in self.op.tags:
10304       self.target.RemoveTag(tag)
10305     self.cfg.Update(self.target, feedback_fn)
10306
10307
10308 class LUTestDelay(NoHooksLU):
10309   """Sleep for a specified amount of time.
10310
10311   This LU sleeps on the master and/or nodes for a specified amount of
10312   time.
10313
10314   """
10315   _OP_PARAMS = [
10316     ("duration", ht.NoDefault, ht.TFloat),
10317     ("on_master", True, ht.TBool),
10318     ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10319     ("repeat", 0, ht.TPositiveInt)
10320     ]
10321   REQ_BGL = False
10322
10323   def ExpandNames(self):
10324     """Expand names and set required locks.
10325
10326     This expands the node list, if any.
10327
10328     """
10329     self.needed_locks = {}
10330     if self.op.on_nodes:
10331       # _GetWantedNodes can be used here, but is not always appropriate to use
10332       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10333       # more information.
10334       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10335       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10336
10337   def _TestDelay(self):
10338     """Do the actual sleep.
10339
10340     """
10341     if self.op.on_master:
10342       if not utils.TestDelay(self.op.duration):
10343         raise errors.OpExecError("Error during master delay test")
10344     if self.op.on_nodes:
10345       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10346       for node, node_result in result.items():
10347         node_result.Raise("Failure during rpc call to node %s" % node)
10348
10349   def Exec(self, feedback_fn):
10350     """Execute the test delay opcode, with the wanted repetitions.
10351
10352     """
10353     if self.op.repeat == 0:
10354       self._TestDelay()
10355     else:
10356       top_value = self.op.repeat - 1
10357       for i in range(self.op.repeat):
10358         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10359         self._TestDelay()
10360
10361
10362 class LUTestJobqueue(NoHooksLU):
10363   """Utility LU to test some aspects of the job queue.
10364
10365   """
10366   _OP_PARAMS = [
10367     ("notify_waitlock", False, ht.TBool),
10368     ("notify_exec", False, ht.TBool),
10369     ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
10370     ("fail", False, ht.TBool),
10371     ]
10372   REQ_BGL = False
10373
10374   # Must be lower than default timeout for WaitForJobChange to see whether it
10375   # notices changed jobs
10376   _CLIENT_CONNECT_TIMEOUT = 20.0
10377   _CLIENT_CONFIRM_TIMEOUT = 60.0
10378
10379   @classmethod
10380   def _NotifyUsingSocket(cls, cb, errcls):
10381     """Opens a Unix socket and waits for another program to connect.
10382
10383     @type cb: callable
10384     @param cb: Callback to send socket name to client
10385     @type errcls: class
10386     @param errcls: Exception class to use for errors
10387
10388     """
10389     # Using a temporary directory as there's no easy way to create temporary
10390     # sockets without writing a custom loop around tempfile.mktemp and
10391     # socket.bind
10392     tmpdir = tempfile.mkdtemp()
10393     try:
10394       tmpsock = utils.PathJoin(tmpdir, "sock")
10395
10396       logging.debug("Creating temporary socket at %s", tmpsock)
10397       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10398       try:
10399         sock.bind(tmpsock)
10400         sock.listen(1)
10401
10402         # Send details to client
10403         cb(tmpsock)
10404
10405         # Wait for client to connect before continuing
10406         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10407         try:
10408           (conn, _) = sock.accept()
10409         except socket.error, err:
10410           raise errcls("Client didn't connect in time (%s)" % err)
10411       finally:
10412         sock.close()
10413     finally:
10414       # Remove as soon as client is connected
10415       shutil.rmtree(tmpdir)
10416
10417     # Wait for client to close
10418     try:
10419       try:
10420         # pylint: disable-msg=E1101
10421         # Instance of '_socketobject' has no ... member
10422         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10423         conn.recv(1)
10424       except socket.error, err:
10425         raise errcls("Client failed to confirm notification (%s)" % err)
10426     finally:
10427       conn.close()
10428
10429   def _SendNotification(self, test, arg, sockname):
10430     """Sends a notification to the client.
10431
10432     @type test: string
10433     @param test: Test name
10434     @param arg: Test argument (depends on test)
10435     @type sockname: string
10436     @param sockname: Socket path
10437
10438     """
10439     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10440
10441   def _Notify(self, prereq, test, arg):
10442     """Notifies the client of a test.
10443
10444     @type prereq: bool
10445     @param prereq: Whether this is a prereq-phase test
10446     @type test: string
10447     @param test: Test name
10448     @param arg: Test argument (depends on test)
10449
10450     """
10451     if prereq:
10452       errcls = errors.OpPrereqError
10453     else:
10454       errcls = errors.OpExecError
10455
10456     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10457                                                   test, arg),
10458                                    errcls)
10459
10460   def CheckArguments(self):
10461     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10462     self.expandnames_calls = 0
10463
10464   def ExpandNames(self):
10465     checkargs_calls = getattr(self, "checkargs_calls", 0)
10466     if checkargs_calls < 1:
10467       raise errors.ProgrammerError("CheckArguments was not called")
10468
10469     self.expandnames_calls += 1
10470
10471     if self.op.notify_waitlock:
10472       self._Notify(True, constants.JQT_EXPANDNAMES, None)
10473
10474     self.LogInfo("Expanding names")
10475
10476     # Get lock on master node (just to get a lock, not for a particular reason)
10477     self.needed_locks = {
10478       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10479       }
10480
10481   def Exec(self, feedback_fn):
10482     if self.expandnames_calls < 1:
10483       raise errors.ProgrammerError("ExpandNames was not called")
10484
10485     if self.op.notify_exec:
10486       self._Notify(False, constants.JQT_EXEC, None)
10487
10488     self.LogInfo("Executing")
10489
10490     if self.op.log_messages:
10491       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10492       for idx, msg in enumerate(self.op.log_messages):
10493         self.LogInfo("Sending log message %s", idx + 1)
10494         feedback_fn(constants.JQT_MSGPREFIX + msg)
10495         # Report how many test messages have been sent
10496         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10497
10498     if self.op.fail:
10499       raise errors.OpExecError("Opcode failure was requested")
10500
10501     return True
10502
10503
10504 class IAllocator(object):
10505   """IAllocator framework.
10506
10507   An IAllocator instance has three sets of attributes:
10508     - cfg that is needed to query the cluster
10509     - input data (all members of the _KEYS class attribute are required)
10510     - four buffer attributes (in|out_data|text), that represent the
10511       input (to the external script) in text and data structure format,
10512       and the output from it, again in two formats
10513     - the result variables from the script (success, info, nodes) for
10514       easy usage
10515
10516   """
10517   # pylint: disable-msg=R0902
10518   # lots of instance attributes
10519   _ALLO_KEYS = [
10520     "name", "mem_size", "disks", "disk_template",
10521     "os", "tags", "nics", "vcpus", "hypervisor",
10522     ]
10523   _RELO_KEYS = [
10524     "name", "relocate_from",
10525     ]
10526   _EVAC_KEYS = [
10527     "evac_nodes",
10528     ]
10529
10530   def __init__(self, cfg, rpc, mode, **kwargs):
10531     self.cfg = cfg
10532     self.rpc = rpc
10533     # init buffer variables
10534     self.in_text = self.out_text = self.in_data = self.out_data = None
10535     # init all input fields so that pylint is happy
10536     self.mode = mode
10537     self.mem_size = self.disks = self.disk_template = None
10538     self.os = self.tags = self.nics = self.vcpus = None
10539     self.hypervisor = None
10540     self.relocate_from = None
10541     self.name = None
10542     self.evac_nodes = None
10543     # computed fields
10544     self.required_nodes = None
10545     # init result fields
10546     self.success = self.info = self.result = None
10547     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10548       keyset = self._ALLO_KEYS
10549       fn = self._AddNewInstance
10550     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10551       keyset = self._RELO_KEYS
10552       fn = self._AddRelocateInstance
10553     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10554       keyset = self._EVAC_KEYS
10555       fn = self._AddEvacuateNodes
10556     else:
10557       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10558                                    " IAllocator" % self.mode)
10559     for key in kwargs:
10560       if key not in keyset:
10561         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10562                                      " IAllocator" % key)
10563       setattr(self, key, kwargs[key])
10564
10565     for key in keyset:
10566       if key not in kwargs:
10567         raise errors.ProgrammerError("Missing input parameter '%s' to"
10568                                      " IAllocator" % key)
10569     self._BuildInputData(fn)
10570
10571   def _ComputeClusterData(self):
10572     """Compute the generic allocator input data.
10573
10574     This is the data that is independent of the actual operation.
10575
10576     """
10577     cfg = self.cfg
10578     cluster_info = cfg.GetClusterInfo()
10579     # cluster data
10580     data = {
10581       "version": constants.IALLOCATOR_VERSION,
10582       "cluster_name": cfg.GetClusterName(),
10583       "cluster_tags": list(cluster_info.GetTags()),
10584       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10585       # we don't have job IDs
10586       }
10587     iinfo = cfg.GetAllInstancesInfo().values()
10588     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10589
10590     # node data
10591     node_list = cfg.GetNodeList()
10592
10593     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10594       hypervisor_name = self.hypervisor
10595     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10596       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10597     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10598       hypervisor_name = cluster_info.enabled_hypervisors[0]
10599
10600     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10601                                         hypervisor_name)
10602     node_iinfo = \
10603       self.rpc.call_all_instances_info(node_list,
10604                                        cluster_info.enabled_hypervisors)
10605
10606     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10607
10608     data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
10609
10610     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10611
10612     self.in_data = data
10613
10614   @staticmethod
10615   def _ComputeNodeGroupData(cfg):
10616     """Compute node groups data.
10617
10618     """
10619     ng = {}
10620     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10621       ng[guuid] = { "name": gdata.name }
10622     return ng
10623
10624   @staticmethod
10625   def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
10626     """Compute global node data.
10627
10628     """
10629     node_results = {}
10630     for nname, nresult in node_data.items():
10631       # first fill in static (config-based) values
10632       ninfo = cfg.GetNodeInfo(nname)
10633       pnr = {
10634         "tags": list(ninfo.GetTags()),
10635         "primary_ip": ninfo.primary_ip,
10636         "secondary_ip": ninfo.secondary_ip,
10637         "offline": ninfo.offline,
10638         "drained": ninfo.drained,
10639         "master_candidate": ninfo.master_candidate,
10640         "group": ninfo.group,
10641         "master_capable": ninfo.master_capable,
10642         "vm_capable": ninfo.vm_capable,
10643         }
10644
10645       if not (ninfo.offline or ninfo.drained):
10646         nresult.Raise("Can't get data for node %s" % nname)
10647         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10648                                 nname)
10649         remote_info = nresult.payload
10650
10651         for attr in ['memory_total', 'memory_free', 'memory_dom0',
10652                      'vg_size', 'vg_free', 'cpu_total']:
10653           if attr not in remote_info:
10654             raise errors.OpExecError("Node '%s' didn't return attribute"
10655                                      " '%s'" % (nname, attr))
10656           if not isinstance(remote_info[attr], int):
10657             raise errors.OpExecError("Node '%s' returned invalid value"
10658                                      " for '%s': %s" %
10659                                      (nname, attr, remote_info[attr]))
10660         # compute memory used by primary instances
10661         i_p_mem = i_p_up_mem = 0
10662         for iinfo, beinfo in i_list:
10663           if iinfo.primary_node == nname:
10664             i_p_mem += beinfo[constants.BE_MEMORY]
10665             if iinfo.name not in node_iinfo[nname].payload:
10666               i_used_mem = 0
10667             else:
10668               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10669             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10670             remote_info['memory_free'] -= max(0, i_mem_diff)
10671
10672             if iinfo.admin_up:
10673               i_p_up_mem += beinfo[constants.BE_MEMORY]
10674
10675         # compute memory used by instances
10676         pnr_dyn = {
10677           "total_memory": remote_info['memory_total'],
10678           "reserved_memory": remote_info['memory_dom0'],
10679           "free_memory": remote_info['memory_free'],
10680           "total_disk": remote_info['vg_size'],
10681           "free_disk": remote_info['vg_free'],
10682           "total_cpus": remote_info['cpu_total'],
10683           "i_pri_memory": i_p_mem,
10684           "i_pri_up_memory": i_p_up_mem,
10685           }
10686         pnr.update(pnr_dyn)
10687
10688       node_results[nname] = pnr
10689
10690     return node_results
10691
10692   @staticmethod
10693   def _ComputeInstanceData(cluster_info, i_list):
10694     """Compute global instance data.
10695
10696     """
10697     instance_data = {}
10698     for iinfo, beinfo in i_list:
10699       nic_data = []
10700       for nic in iinfo.nics:
10701         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10702         nic_dict = {"mac": nic.mac,
10703                     "ip": nic.ip,
10704                     "mode": filled_params[constants.NIC_MODE],
10705                     "link": filled_params[constants.NIC_LINK],
10706                    }
10707         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10708           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10709         nic_data.append(nic_dict)
10710       pir = {
10711         "tags": list(iinfo.GetTags()),
10712         "admin_up": iinfo.admin_up,
10713         "vcpus": beinfo[constants.BE_VCPUS],
10714         "memory": beinfo[constants.BE_MEMORY],
10715         "os": iinfo.os,
10716         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10717         "nics": nic_data,
10718         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10719         "disk_template": iinfo.disk_template,
10720         "hypervisor": iinfo.hypervisor,
10721         }
10722       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10723                                                  pir["disks"])
10724       instance_data[iinfo.name] = pir
10725
10726     return instance_data
10727
10728   def _AddNewInstance(self):
10729     """Add new instance data to allocator structure.
10730
10731     This in combination with _AllocatorGetClusterData will create the
10732     correct structure needed as input for the allocator.
10733
10734     The checks for the completeness of the opcode must have already been
10735     done.
10736
10737     """
10738     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10739
10740     if self.disk_template in constants.DTS_NET_MIRROR:
10741       self.required_nodes = 2
10742     else:
10743       self.required_nodes = 1
10744     request = {
10745       "name": self.name,
10746       "disk_template": self.disk_template,
10747       "tags": self.tags,
10748       "os": self.os,
10749       "vcpus": self.vcpus,
10750       "memory": self.mem_size,
10751       "disks": self.disks,
10752       "disk_space_total": disk_space,
10753       "nics": self.nics,
10754       "required_nodes": self.required_nodes,
10755       }
10756     return request
10757
10758   def _AddRelocateInstance(self):
10759     """Add relocate instance data to allocator structure.
10760
10761     This in combination with _IAllocatorGetClusterData will create the
10762     correct structure needed as input for the allocator.
10763
10764     The checks for the completeness of the opcode must have already been
10765     done.
10766
10767     """
10768     instance = self.cfg.GetInstanceInfo(self.name)
10769     if instance is None:
10770       raise errors.ProgrammerError("Unknown instance '%s' passed to"
10771                                    " IAllocator" % self.name)
10772
10773     if instance.disk_template not in constants.DTS_NET_MIRROR:
10774       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10775                                  errors.ECODE_INVAL)
10776
10777     if len(instance.secondary_nodes) != 1:
10778       raise errors.OpPrereqError("Instance has not exactly one secondary node",
10779                                  errors.ECODE_STATE)
10780
10781     self.required_nodes = 1
10782     disk_sizes = [{'size': disk.size} for disk in instance.disks]
10783     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10784
10785     request = {
10786       "name": self.name,
10787       "disk_space_total": disk_space,
10788       "required_nodes": self.required_nodes,
10789       "relocate_from": self.relocate_from,
10790       }
10791     return request
10792
10793   def _AddEvacuateNodes(self):
10794     """Add evacuate nodes data to allocator structure.
10795
10796     """
10797     request = {
10798       "evac_nodes": self.evac_nodes
10799       }
10800     return request
10801
10802   def _BuildInputData(self, fn):
10803     """Build input data structures.
10804
10805     """
10806     self._ComputeClusterData()
10807
10808     request = fn()
10809     request["type"] = self.mode
10810     self.in_data["request"] = request
10811
10812     self.in_text = serializer.Dump(self.in_data)
10813
10814   def Run(self, name, validate=True, call_fn=None):
10815     """Run an instance allocator and return the results.
10816
10817     """
10818     if call_fn is None:
10819       call_fn = self.rpc.call_iallocator_runner
10820
10821     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10822     result.Raise("Failure while running the iallocator script")
10823
10824     self.out_text = result.payload
10825     if validate:
10826       self._ValidateResult()
10827
10828   def _ValidateResult(self):
10829     """Process the allocator results.
10830
10831     This will process and if successful save the result in
10832     self.out_data and the other parameters.
10833
10834     """
10835     try:
10836       rdict = serializer.Load(self.out_text)
10837     except Exception, err:
10838       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10839
10840     if not isinstance(rdict, dict):
10841       raise errors.OpExecError("Can't parse iallocator results: not a dict")
10842
10843     # TODO: remove backwards compatiblity in later versions
10844     if "nodes" in rdict and "result" not in rdict:
10845       rdict["result"] = rdict["nodes"]
10846       del rdict["nodes"]
10847
10848     for key in "success", "info", "result":
10849       if key not in rdict:
10850         raise errors.OpExecError("Can't parse iallocator results:"
10851                                  " missing key '%s'" % key)
10852       setattr(self, key, rdict[key])
10853
10854     if not isinstance(rdict["result"], list):
10855       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10856                                " is not a list")
10857     self.out_data = rdict
10858
10859
10860 class LUTestAllocator(NoHooksLU):
10861   """Run allocator tests.
10862
10863   This LU runs the allocator tests
10864
10865   """
10866   _OP_PARAMS = [
10867     ("direction", ht.NoDefault,
10868      ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10869     ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
10870     ("name", ht.NoDefault, ht.TNonEmptyString),
10871     ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
10872       ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
10873                ht.TOr(ht.TNone, ht.TNonEmptyString))))),
10874     ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
10875     ("hypervisor", None, ht.TMaybeString),
10876     ("allocator", None, ht.TMaybeString),
10877     ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10878     ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10879     ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10880     ("os", None, ht.TMaybeString),
10881     ("disk_template", None, ht.TMaybeString),
10882     ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
10883     ]
10884
10885   def CheckPrereq(self):
10886     """Check prerequisites.
10887
10888     This checks the opcode parameters depending on the director and mode test.
10889
10890     """
10891     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10892       for attr in ["mem_size", "disks", "disk_template",
10893                    "os", "tags", "nics", "vcpus"]:
10894         if not hasattr(self.op, attr):
10895           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10896                                      attr, errors.ECODE_INVAL)
10897       iname = self.cfg.ExpandInstanceName(self.op.name)
10898       if iname is not None:
10899         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10900                                    iname, errors.ECODE_EXISTS)
10901       if not isinstance(self.op.nics, list):
10902         raise errors.OpPrereqError("Invalid parameter 'nics'",
10903                                    errors.ECODE_INVAL)
10904       if not isinstance(self.op.disks, list):
10905         raise errors.OpPrereqError("Invalid parameter 'disks'",
10906                                    errors.ECODE_INVAL)
10907       for row in self.op.disks:
10908         if (not isinstance(row, dict) or
10909             "size" not in row or
10910             not isinstance(row["size"], int) or
10911             "mode" not in row or
10912             row["mode"] not in ['r', 'w']):
10913           raise errors.OpPrereqError("Invalid contents of the 'disks'"
10914                                      " parameter", errors.ECODE_INVAL)
10915       if self.op.hypervisor is None:
10916         self.op.hypervisor = self.cfg.GetHypervisorType()
10917     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10918       fname = _ExpandInstanceName(self.cfg, self.op.name)
10919       self.op.name = fname
10920       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10921     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10922       if not hasattr(self.op, "evac_nodes"):
10923         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10924                                    " opcode input", errors.ECODE_INVAL)
10925     else:
10926       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10927                                  self.op.mode, errors.ECODE_INVAL)
10928
10929     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10930       if self.op.allocator is None:
10931         raise errors.OpPrereqError("Missing allocator name",
10932                                    errors.ECODE_INVAL)
10933     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10934       raise errors.OpPrereqError("Wrong allocator test '%s'" %
10935                                  self.op.direction, errors.ECODE_INVAL)
10936
10937   def Exec(self, feedback_fn):
10938     """Run the allocator test.
10939
10940     """
10941     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10942       ial = IAllocator(self.cfg, self.rpc,
10943                        mode=self.op.mode,
10944                        name=self.op.name,
10945                        mem_size=self.op.mem_size,
10946                        disks=self.op.disks,
10947                        disk_template=self.op.disk_template,
10948                        os=self.op.os,
10949                        tags=self.op.tags,
10950                        nics=self.op.nics,
10951                        vcpus=self.op.vcpus,
10952                        hypervisor=self.op.hypervisor,
10953                        )
10954     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10955       ial = IAllocator(self.cfg, self.rpc,
10956                        mode=self.op.mode,
10957                        name=self.op.name,
10958                        relocate_from=list(self.relocate_from),
10959                        )
10960     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10961       ial = IAllocator(self.cfg, self.rpc,
10962                        mode=self.op.mode,
10963                        evac_nodes=self.op.evac_nodes)
10964     else:
10965       raise errors.ProgrammerError("Uncatched mode %s in"
10966                                    " LUTestAllocator.Exec", self.op.mode)
10967
10968     if self.op.direction == constants.IALLOCATOR_DIR_IN:
10969       result = ial.in_text
10970     else:
10971       ial.Run(self.op.allocator, validate=False)
10972       result = ial.out_text
10973     return result