code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import ht
  58
  59 import ganeti.masterd.instance # pylint: disable-msg=W0611
  60
  61 # Common opcode attributes
  62
  63 #: output fields for a query operation
  64 _POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
  65
  66
  67 #: the shutdown timeout
  68 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
  69                      ht.TPositiveInt)
  70
  71 #: the force parameter
  72 _PForce = ("force", False, ht.TBool)
  73
  74 #: a required instance name (for single-instance LUs)
  75 _PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
  76
  77 #: Whether to ignore offline nodes
  78 _PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
  79
  80 #: a required node name (for single-node LUs)
  81 _PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
  82
  83 #: the migration type (live/non-live)
  84 _PMigrationMode = ("mode", None,
  85                    ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
  86
  87 #: the obsolete 'live' mode (boolean)
  88 _PMigrationLive = ("live", None, ht.TMaybeBool)
  89
  90
  91 # End types
  92 class LogicalUnit(object):
  93   """Logical Unit base class.
  94
  95   Subclasses must follow these rules:
  96     - implement ExpandNames
  97     - implement CheckPrereq (except when tasklets are used)
  98     - implement Exec (except when tasklets are used)
  99     - implement BuildHooksEnv
 100     - redefine HPATH and HTYPE
 101     - optionally redefine their run requirements:
 102         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 103
 104   Note that all commands require root permissions.
 105
 106   @ivar dry_run_result: the value (if any) that will be returned to the caller
 107       in dry-run mode (signalled by opcode dry_run parameter)
 108   @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
 109       they should get if not already defined, and types they must match
 110
 111   """
 112   HPATH = None
 113   HTYPE = None
 114   _OP_PARAMS = []
 115   REQ_BGL = True
 116
 117   def __init__(self, processor, op, context, rpc):
 118     """Constructor for LogicalUnit.
 119
 120     This needs to be overridden in derived classes in order to check op
 121     validity.
 122
 123     """
 124     self.proc = processor
 125     self.op = op
 126     self.cfg = context.cfg
 127     self.context = context
 128     self.rpc = rpc
 129     # Dicts used to declare locking needs to mcpu
 130     self.needed_locks = None
 131     self.acquired_locks = {}
 132     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 133     self.add_locks = {}
 134     self.remove_locks = {}
 135     # Used to force good behavior when calling helper functions
 136     self.recalculate_locks = {}
 137     self.__ssh = None
 138     # logging
 139     self.Log = processor.Log # pylint: disable-msg=C0103
 140     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 141     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 142     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 143     # support for dry-run
 144     self.dry_run_result = None
 145     # support for generic debug attribute
 146     if (not hasattr(self.op, "debug_level") or
 147         not isinstance(self.op.debug_level, int)):
 148       self.op.debug_level = 0
 149
 150     # Tasklets
 151     self.tasklets = None
 152
 153     # The new kind-of-type-system
 154     op_id = self.op.OP_ID
 155     for attr_name, aval, test in self._OP_PARAMS:
 156       if not hasattr(op, attr_name):
 157         if aval == ht.NoDefault:
 158           raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
 159                                      (op_id, attr_name), errors.ECODE_INVAL)
 160         else:
 161           if callable(aval):
 162             dval = aval()
 163           else:
 164             dval = aval
 165           setattr(self.op, attr_name, dval)
 166       attr_val = getattr(op, attr_name)
 167       if test == ht.NoType:
 168         # no tests here
 169         continue
 170       if not callable(test):
 171         raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
 172                                      " given type is not a proper type (%s)" %
 173                                      (op_id, attr_name, test))
 174       if not test(attr_val):
 175         logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
 176                       self.op.OP_ID, attr_name, type(attr_val), attr_val)
 177         raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
 178                                    (op_id, attr_name), errors.ECODE_INVAL)
 179
 180     self.CheckArguments()
 181
 182   def __GetSSH(self):
 183     """Returns the SshRunner object
 184
 185     """
 186     if not self.__ssh:
 187       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 188     return self.__ssh
 189
 190   ssh = property(fget=__GetSSH)
 191
 192   def CheckArguments(self):
 193     """Check syntactic validity for the opcode arguments.
 194
 195     This method is for doing a simple syntactic check and ensure
 196     validity of opcode parameters, without any cluster-related
 197     checks. While the same can be accomplished in ExpandNames and/or
 198     CheckPrereq, doing these separate is better because:
 199
 200       - ExpandNames is left as as purely a lock-related function
 201       - CheckPrereq is run after we have acquired locks (and possible
 202         waited for them)
 203
 204     The function is allowed to change the self.op attribute so that
 205     later methods can no longer worry about missing parameters.
 206
 207     """
 208     pass
 209
 210   def ExpandNames(self):
 211     """Expand names for this LU.
 212
 213     This method is called before starting to execute the opcode, and it should
 214     update all the parameters of the opcode to their canonical form (e.g. a
 215     short node name must be fully expanded after this method has successfully
 216     completed). This way locking, hooks, logging, ecc. can work correctly.
 217
 218     LUs which implement this method must also populate the self.needed_locks
 219     member, as a dict with lock levels as keys, and a list of needed lock names
 220     as values. Rules:
 221
 222       - use an empty dict if you don't need any lock
 223       - if you don't need any lock at a particular level omit that level
 224       - don't put anything for the BGL level
 225       - if you want all locks at a level use locking.ALL_SET as a value
 226
 227     If you need to share locks (rather than acquire them exclusively) at one
 228     level you can modify self.share_locks, setting a true value (usually 1) for
 229     that level. By default locks are not shared.
 230
 231     This function can also define a list of tasklets, which then will be
 232     executed in order instead of the usual LU-level CheckPrereq and Exec
 233     functions, if those are not defined by the LU.
 234
 235     Examples::
 236
 237       # Acquire all nodes and one instance
 238       self.needed_locks = {
 239         locking.LEVEL_NODE: locking.ALL_SET,
 240         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 241       }
 242       # Acquire just two nodes
 243       self.needed_locks = {
 244         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 245       }
 246       # Acquire no locks
 247       self.needed_locks = {} # No, you can't leave it to the default value None
 248
 249     """
 250     # The implementation of this method is mandatory only if the new LU is
 251     # concurrent, so that old LUs don't need to be changed all at the same
 252     # time.
 253     if self.REQ_BGL:
 254       self.needed_locks = {} # Exclusive LUs don't need locks.
 255     else:
 256       raise NotImplementedError
 257
 258   def DeclareLocks(self, level):
 259     """Declare LU locking needs for a level
 260
 261     While most LUs can just declare their locking needs at ExpandNames time,
 262     sometimes there's the need to calculate some locks after having acquired
 263     the ones before. This function is called just before acquiring locks at a
 264     particular level, but after acquiring the ones at lower levels, and permits
 265     such calculations. It can be used to modify self.needed_locks, and by
 266     default it does nothing.
 267
 268     This function is only called if you have something already set in
 269     self.needed_locks for the level.
 270
 271     @param level: Locking level which is going to be locked
 272     @type level: member of ganeti.locking.LEVELS
 273
 274     """
 275
 276   def CheckPrereq(self):
 277     """Check prerequisites for this LU.
 278
 279     This method should check that the prerequisites for the execution
 280     of this LU are fulfilled. It can do internode communication, but
 281     it should be idempotent - no cluster or system changes are
 282     allowed.
 283
 284     The method should raise errors.OpPrereqError in case something is
 285     not fulfilled. Its return value is ignored.
 286
 287     This method should also update all the parameters of the opcode to
 288     their canonical form if it hasn't been done by ExpandNames before.
 289
 290     """
 291     if self.tasklets is not None:
 292       for (idx, tl) in enumerate(self.tasklets):
 293         logging.debug("Checking prerequisites for tasklet %s/%s",
 294                       idx + 1, len(self.tasklets))
 295         tl.CheckPrereq()
 296     else:
 297       pass
 298
 299   def Exec(self, feedback_fn):
 300     """Execute the LU.
 301
 302     This method should implement the actual work. It should raise
 303     errors.OpExecError for failures that are somewhat dealt with in
 304     code, or expected.
 305
 306     """
 307     if self.tasklets is not None:
 308       for (idx, tl) in enumerate(self.tasklets):
 309         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 310         tl.Exec(feedback_fn)
 311     else:
 312       raise NotImplementedError
 313
 314   def BuildHooksEnv(self):
 315     """Build hooks environment for this LU.
 316
 317     This method should return a three-node tuple consisting of: a dict
 318     containing the environment that will be used for running the
 319     specific hook for this LU, a list of node names on which the hook
 320     should run before the execution, and a list of node names on which
 321     the hook should run after the execution.
 322
 323     The keys of the dict must not have 'GANETI_' prefixed as this will
 324     be handled in the hooks runner. Also note additional keys will be
 325     added by the hooks runner. If the LU doesn't define any
 326     environment, an empty dict (and not None) should be returned.
 327
 328     No nodes should be returned as an empty list (and not None).
 329
 330     Note that if the HPATH for a LU class is None, this function will
 331     not be called.
 332
 333     """
 334     raise NotImplementedError
 335
 336   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 337     """Notify the LU about the results of its hooks.
 338
 339     This method is called every time a hooks phase is executed, and notifies
 340     the Logical Unit about the hooks' result. The LU can then use it to alter
 341     its result based on the hooks.  By default the method does nothing and the
 342     previous result is passed back unchanged but any LU can define it if it
 343     wants to use the local cluster hook-scripts somehow.
 344
 345     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 346         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 347     @param hook_results: the results of the multi-node hooks rpc call
 348     @param feedback_fn: function used send feedback back to the caller
 349     @param lu_result: the previous Exec result this LU had, or None
 350         in the PRE phase
 351     @return: the new Exec result, based on the previous result
 352         and hook results
 353
 354     """
 355     # API must be kept, thus we ignore the unused argument and could
 356     # be a function warnings
 357     # pylint: disable-msg=W0613,R0201
 358     return lu_result
 359
 360   def _ExpandAndLockInstance(self):
 361     """Helper function to expand and lock an instance.
 362
 363     Many LUs that work on an instance take its name in self.op.instance_name
 364     and need to expand it and then declare the expanded name for locking. This
 365     function does it, and then updates self.op.instance_name to the expanded
 366     name. It also initializes needed_locks as a dict, if this hasn't been done
 367     before.
 368
 369     """
 370     if self.needed_locks is None:
 371       self.needed_locks = {}
 372     else:
 373       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 374         "_ExpandAndLockInstance called with instance-level locks set"
 375     self.op.instance_name = _ExpandInstanceName(self.cfg,
 376                                                 self.op.instance_name)
 377     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 378
 379   def _LockInstancesNodes(self, primary_only=False):
 380     """Helper function to declare instances' nodes for locking.
 381
 382     This function should be called after locking one or more instances to lock
 383     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 384     with all primary or secondary nodes for instances already locked and
 385     present in self.needed_locks[locking.LEVEL_INSTANCE].
 386
 387     It should be called from DeclareLocks, and for safety only works if
 388     self.recalculate_locks[locking.LEVEL_NODE] is set.
 389
 390     In the future it may grow parameters to just lock some instance's nodes, or
 391     to just lock primaries or secondary nodes, if needed.
 392
 393     If should be called in DeclareLocks in a way similar to::
 394
 395       if level == locking.LEVEL_NODE:
 396         self._LockInstancesNodes()
 397
 398     @type primary_only: boolean
 399     @param primary_only: only lock primary nodes of locked instances
 400
 401     """
 402     assert locking.LEVEL_NODE in self.recalculate_locks, \
 403       "_LockInstancesNodes helper function called with no nodes to recalculate"
 404
 405     # TODO: check if we're really been called with the instance locks held
 406
 407     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 408     # future we might want to have different behaviors depending on the value
 409     # of self.recalculate_locks[locking.LEVEL_NODE]
 410     wanted_nodes = []
 411     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 412       instance = self.context.cfg.GetInstanceInfo(instance_name)
 413       wanted_nodes.append(instance.primary_node)
 414       if not primary_only:
 415         wanted_nodes.extend(instance.secondary_nodes)
 416
 417     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 418       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 419     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 420       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 421
 422     del self.recalculate_locks[locking.LEVEL_NODE]
 423
 424
 425 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 426   """Simple LU which runs no hooks.
 427
 428   This LU is intended as a parent for other LogicalUnits which will
 429   run no hooks, in order to reduce duplicate code.
 430
 431   """
 432   HPATH = None
 433   HTYPE = None
 434
 435   def BuildHooksEnv(self):
 436     """Empty BuildHooksEnv for NoHooksLu.
 437
 438     This just raises an error.
 439
 440     """
 441     assert False, "BuildHooksEnv called for NoHooksLUs"
 442
 443
 444 class Tasklet:
 445   """Tasklet base class.
 446
 447   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 448   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 449   tasklets know nothing about locks.
 450
 451   Subclasses must follow these rules:
 452     - Implement CheckPrereq
 453     - Implement Exec
 454
 455   """
 456   def __init__(self, lu):
 457     self.lu = lu
 458
 459     # Shortcuts
 460     self.cfg = lu.cfg
 461     self.rpc = lu.rpc
 462
 463   def CheckPrereq(self):
 464     """Check prerequisites for this tasklets.
 465
 466     This method should check whether the prerequisites for the execution of
 467     this tasklet are fulfilled. It can do internode communication, but it
 468     should be idempotent - no cluster or system changes are allowed.
 469
 470     The method should raise errors.OpPrereqError in case something is not
 471     fulfilled. Its return value is ignored.
 472
 473     This method should also update all parameters to their canonical form if it
 474     hasn't been done before.
 475
 476     """
 477     pass
 478
 479   def Exec(self, feedback_fn):
 480     """Execute the tasklet.
 481
 482     This method should implement the actual work. It should raise
 483     errors.OpExecError for failures that are somewhat dealt with in code, or
 484     expected.
 485
 486     """
 487     raise NotImplementedError
 488
 489
 490 def _GetWantedNodes(lu, nodes):
 491   """Returns list of checked and expanded node names.
 492
 493   @type lu: L{LogicalUnit}
 494   @param lu: the logical unit on whose behalf we execute
 495   @type nodes: list
 496   @param nodes: list of node names or None for all nodes
 497   @rtype: list
 498   @return: the list of nodes, sorted
 499   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 500
 501   """
 502   if not nodes:
 503     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 504       " non-empty list of nodes whose name is to be expanded.")
 505
 506   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 507   return utils.NiceSort(wanted)
 508
 509
 510 def _GetWantedInstances(lu, instances):
 511   """Returns list of checked and expanded instance names.
 512
 513   @type lu: L{LogicalUnit}
 514   @param lu: the logical unit on whose behalf we execute
 515   @type instances: list
 516   @param instances: list of instance names or None for all instances
 517   @rtype: list
 518   @return: the list of instances, sorted
 519   @raise errors.OpPrereqError: if the instances parameter is wrong type
 520   @raise errors.OpPrereqError: if any of the passed instances is not found
 521
 522   """
 523   if instances:
 524     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 525   else:
 526     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 527   return wanted
 528
 529
 530 def _GetUpdatedParams(old_params, update_dict,
 531                       use_default=True, use_none=False):
 532   """Return the new version of a parameter dictionary.
 533
 534   @type old_params: dict
 535   @param old_params: old parameters
 536   @type update_dict: dict
 537   @param update_dict: dict containing new parameter values, or
 538       constants.VALUE_DEFAULT to reset the parameter to its default
 539       value
 540   @param use_default: boolean
 541   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 542       values as 'to be deleted' values
 543   @param use_none: boolean
 544   @type use_none: whether to recognise C{None} values as 'to be
 545       deleted' values
 546   @rtype: dict
 547   @return: the new parameter dictionary
 548
 549   """
 550   params_copy = copy.deepcopy(old_params)
 551   for key, val in update_dict.iteritems():
 552     if ((use_default and val == constants.VALUE_DEFAULT) or
 553         (use_none and val is None)):
 554       try:
 555         del params_copy[key]
 556       except KeyError:
 557         pass
 558     else:
 559       params_copy[key] = val
 560   return params_copy
 561
 562
 563 def _CheckOutputFields(static, dynamic, selected):
 564   """Checks whether all selected fields are valid.
 565
 566   @type static: L{utils.FieldSet}
 567   @param static: static fields set
 568   @type dynamic: L{utils.FieldSet}
 569   @param dynamic: dynamic fields set
 570
 571   """
 572   f = utils.FieldSet()
 573   f.Extend(static)
 574   f.Extend(dynamic)
 575
 576   delta = f.NonMatching(selected)
 577   if delta:
 578     raise errors.OpPrereqError("Unknown output fields selected: %s"
 579                                % ",".join(delta), errors.ECODE_INVAL)
 580
 581
 582 def _CheckGlobalHvParams(params):
 583   """Validates that given hypervisor params are not global ones.
 584
 585   This will ensure that instances don't get customised versions of
 586   global params.
 587
 588   """
 589   used_globals = constants.HVC_GLOBALS.intersection(params)
 590   if used_globals:
 591     msg = ("The following hypervisor parameters are global and cannot"
 592            " be customized at instance level, please modify them at"
 593            " cluster level: %s" % utils.CommaJoin(used_globals))
 594     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 595
 596
 597 def _CheckNodeOnline(lu, node, msg=None):
 598   """Ensure that a given node is online.
 599
 600   @param lu: the LU on behalf of which we make the check
 601   @param node: the node to check
 602   @param msg: if passed, should be a message to replace the default one
 603   @raise errors.OpPrereqError: if the node is offline
 604
 605   """
 606   if msg is None:
 607     msg = "Can't use offline node"
 608   if lu.cfg.GetNodeInfo(node).offline:
 609     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 610
 611
 612 def _CheckNodeNotDrained(lu, node):
 613   """Ensure that a given node is not drained.
 614
 615   @param lu: the LU on behalf of which we make the check
 616   @param node: the node to check
 617   @raise errors.OpPrereqError: if the node is drained
 618
 619   """
 620   if lu.cfg.GetNodeInfo(node).drained:
 621     raise errors.OpPrereqError("Can't use drained node %s" % node,
 622                                errors.ECODE_STATE)
 623
 624
 625 def _CheckNodeVmCapable(lu, node):
 626   """Ensure that a given node is vm capable.
 627
 628   @param lu: the LU on behalf of which we make the check
 629   @param node: the node to check
 630   @raise errors.OpPrereqError: if the node is not vm capable
 631
 632   """
 633   if not lu.cfg.GetNodeInfo(node).vm_capable:
 634     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 635                                errors.ECODE_STATE)
 636
 637
 638 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 639   """Ensure that a node supports a given OS.
 640
 641   @param lu: the LU on behalf of which we make the check
 642   @param node: the node to check
 643   @param os_name: the OS to query about
 644   @param force_variant: whether to ignore variant errors
 645   @raise errors.OpPrereqError: if the node is not supporting the OS
 646
 647   """
 648   result = lu.rpc.call_os_get(node, os_name)
 649   result.Raise("OS '%s' not in supported OS list for node %s" %
 650                (os_name, node),
 651                prereq=True, ecode=errors.ECODE_INVAL)
 652   if not force_variant:
 653     _CheckOSVariant(result.payload, os_name)
 654
 655
 656 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 657   """Ensure that a node has the given secondary ip.
 658
 659   @type lu: L{LogicalUnit}
 660   @param lu: the LU on behalf of which we make the check
 661   @type node: string
 662   @param node: the node to check
 663   @type secondary_ip: string
 664   @param secondary_ip: the ip to check
 665   @type prereq: boolean
 666   @param prereq: whether to throw a prerequisite or an execute error
 667   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 668   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 669
 670   """
 671   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 672   result.Raise("Failure checking secondary ip on node %s" % node,
 673                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 674   if not result.payload:
 675     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 676            " please fix and re-run this command" % secondary_ip)
 677     if prereq:
 678       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 679     else:
 680       raise errors.OpExecError(msg)
 681
 682
 683 def _RequireFileStorage():
 684   """Checks that file storage is enabled.
 685
 686   @raise errors.OpPrereqError: when file storage is disabled
 687
 688   """
 689   if not constants.ENABLE_FILE_STORAGE:
 690     raise errors.OpPrereqError("File storage disabled at configure time",
 691                                errors.ECODE_INVAL)
 692
 693
 694 def _CheckDiskTemplate(template):
 695   """Ensure a given disk template is valid.
 696
 697   """
 698   if template not in constants.DISK_TEMPLATES:
 699     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 700            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 701     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 702   if template == constants.DT_FILE:
 703     _RequireFileStorage()
 704   return True
 705
 706
 707 def _CheckStorageType(storage_type):
 708   """Ensure a given storage type is valid.
 709
 710   """
 711   if storage_type not in constants.VALID_STORAGE_TYPES:
 712     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 713                                errors.ECODE_INVAL)
 714   if storage_type == constants.ST_FILE:
 715     _RequireFileStorage()
 716   return True
 717
 718
 719 def _GetClusterDomainSecret():
 720   """Reads the cluster domain secret.
 721
 722   """
 723   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 724                                strict=True)
 725
 726
 727 def _CheckInstanceDown(lu, instance, reason):
 728   """Ensure that an instance is not running."""
 729   if instance.admin_up:
 730     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 731                                (instance.name, reason), errors.ECODE_STATE)
 732
 733   pnode = instance.primary_node
 734   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 735   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 736               prereq=True, ecode=errors.ECODE_ENVIRON)
 737
 738   if instance.name in ins_l.payload:
 739     raise errors.OpPrereqError("Instance %s is running, %s" %
 740                                (instance.name, reason), errors.ECODE_STATE)
 741
 742
 743 def _ExpandItemName(fn, name, kind):
 744   """Expand an item name.
 745
 746   @param fn: the function to use for expansion
 747   @param name: requested item name
 748   @param kind: text description ('Node' or 'Instance')
 749   @return: the resolved (full) name
 750   @raise errors.OpPrereqError: if the item is not found
 751
 752   """
 753   full_name = fn(name)
 754   if full_name is None:
 755     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 756                                errors.ECODE_NOENT)
 757   return full_name
 758
 759
 760 def _ExpandNodeName(cfg, name):
 761   """Wrapper over L{_ExpandItemName} for nodes."""
 762   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 763
 764
 765 def _ExpandInstanceName(cfg, name):
 766   """Wrapper over L{_ExpandItemName} for instance."""
 767   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 768
 769
 770 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 771                           memory, vcpus, nics, disk_template, disks,
 772                           bep, hvp, hypervisor_name):
 773   """Builds instance related env variables for hooks
 774
 775   This builds the hook environment from individual variables.
 776
 777   @type name: string
 778   @param name: the name of the instance
 779   @type primary_node: string
 780   @param primary_node: the name of the instance's primary node
 781   @type secondary_nodes: list
 782   @param secondary_nodes: list of secondary nodes as strings
 783   @type os_type: string
 784   @param os_type: the name of the instance's OS
 785   @type status: boolean
 786   @param status: the should_run status of the instance
 787   @type memory: string
 788   @param memory: the memory size of the instance
 789   @type vcpus: string
 790   @param vcpus: the count of VCPUs the instance has
 791   @type nics: list
 792   @param nics: list of tuples (ip, mac, mode, link) representing
 793       the NICs the instance has
 794   @type disk_template: string
 795   @param disk_template: the disk template of the instance
 796   @type disks: list
 797   @param disks: the list of (size, mode) pairs
 798   @type bep: dict
 799   @param bep: the backend parameters for the instance
 800   @type hvp: dict
 801   @param hvp: the hypervisor parameters for the instance
 802   @type hypervisor_name: string
 803   @param hypervisor_name: the hypervisor for the instance
 804   @rtype: dict
 805   @return: the hook environment for this instance
 806
 807   """
 808   if status:
 809     str_status = "up"
 810   else:
 811     str_status = "down"
 812   env = {
 813     "OP_TARGET": name,
 814     "INSTANCE_NAME": name,
 815     "INSTANCE_PRIMARY": primary_node,
 816     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 817     "INSTANCE_OS_TYPE": os_type,
 818     "INSTANCE_STATUS": str_status,
 819     "INSTANCE_MEMORY": memory,
 820     "INSTANCE_VCPUS": vcpus,
 821     "INSTANCE_DISK_TEMPLATE": disk_template,
 822     "INSTANCE_HYPERVISOR": hypervisor_name,
 823   }
 824
 825   if nics:
 826     nic_count = len(nics)
 827     for idx, (ip, mac, mode, link) in enumerate(nics):
 828       if ip is None:
 829         ip = ""
 830       env["INSTANCE_NIC%d_IP" % idx] = ip
 831       env["INSTANCE_NIC%d_MAC" % idx] = mac
 832       env["INSTANCE_NIC%d_MODE" % idx] = mode
 833       env["INSTANCE_NIC%d_LINK" % idx] = link
 834       if mode == constants.NIC_MODE_BRIDGED:
 835         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 836   else:
 837     nic_count = 0
 838
 839   env["INSTANCE_NIC_COUNT"] = nic_count
 840
 841   if disks:
 842     disk_count = len(disks)
 843     for idx, (size, mode) in enumerate(disks):
 844       env["INSTANCE_DISK%d_SIZE" % idx] = size
 845       env["INSTANCE_DISK%d_MODE" % idx] = mode
 846   else:
 847     disk_count = 0
 848
 849   env["INSTANCE_DISK_COUNT"] = disk_count
 850
 851   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 852     for key, value in source.items():
 853       env["INSTANCE_%s_%s" % (kind, key)] = value
 854
 855   return env
 856
 857
 858 def _NICListToTuple(lu, nics):
 859   """Build a list of nic information tuples.
 860
 861   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 862   value in LUQueryInstanceData.
 863
 864   @type lu:  L{LogicalUnit}
 865   @param lu: the logical unit on whose behalf we execute
 866   @type nics: list of L{objects.NIC}
 867   @param nics: list of nics to convert to hooks tuples
 868
 869   """
 870   hooks_nics = []
 871   cluster = lu.cfg.GetClusterInfo()
 872   for nic in nics:
 873     ip = nic.ip
 874     mac = nic.mac
 875     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 876     mode = filled_params[constants.NIC_MODE]
 877     link = filled_params[constants.NIC_LINK]
 878     hooks_nics.append((ip, mac, mode, link))
 879   return hooks_nics
 880
 881
 882 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 883   """Builds instance related env variables for hooks from an object.
 884
 885   @type lu: L{LogicalUnit}
 886   @param lu: the logical unit on whose behalf we execute
 887   @type instance: L{objects.Instance}
 888   @param instance: the instance for which we should build the
 889       environment
 890   @type override: dict
 891   @param override: dictionary with key/values that will override
 892       our values
 893   @rtype: dict
 894   @return: the hook environment dictionary
 895
 896   """
 897   cluster = lu.cfg.GetClusterInfo()
 898   bep = cluster.FillBE(instance)
 899   hvp = cluster.FillHV(instance)
 900   args = {
 901     'name': instance.name,
 902     'primary_node': instance.primary_node,
 903     'secondary_nodes': instance.secondary_nodes,
 904     'os_type': instance.os,
 905     'status': instance.admin_up,
 906     'memory': bep[constants.BE_MEMORY],
 907     'vcpus': bep[constants.BE_VCPUS],
 908     'nics': _NICListToTuple(lu, instance.nics),
 909     'disk_template': instance.disk_template,
 910     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 911     'bep': bep,
 912     'hvp': hvp,
 913     'hypervisor_name': instance.hypervisor,
 914   }
 915   if override:
 916     args.update(override)
 917   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 918
 919
 920 def _AdjustCandidatePool(lu, exceptions):
 921   """Adjust the candidate pool after node operations.
 922
 923   """
 924   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 925   if mod_list:
 926     lu.LogInfo("Promoted nodes to master candidate role: %s",
 927                utils.CommaJoin(node.name for node in mod_list))
 928     for name in mod_list:
 929       lu.context.ReaddNode(name)
 930   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 931   if mc_now > mc_max:
 932     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 933                (mc_now, mc_max))
 934
 935
 936 def _DecideSelfPromotion(lu, exceptions=None):
 937   """Decide whether I should promote myself as a master candidate.
 938
 939   """
 940   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 941   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 942   # the new node will increase mc_max with one, so:
 943   mc_should = min(mc_should + 1, cp_size)
 944   return mc_now < mc_should
 945
 946
 947 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 948   """Check that the brigdes needed by a list of nics exist.
 949
 950   """
 951   cluster = lu.cfg.GetClusterInfo()
 952   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 953   brlist = [params[constants.NIC_LINK] for params in paramslist
 954             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 955   if brlist:
 956     result = lu.rpc.call_bridges_exist(target_node, brlist)
 957     result.Raise("Error checking bridges on destination node '%s'" %
 958                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 959
 960
 961 def _CheckInstanceBridgesExist(lu, instance, node=None):
 962   """Check that the brigdes needed by an instance exist.
 963
 964   """
 965   if node is None:
 966     node = instance.primary_node
 967   _CheckNicsBridgesExist(lu, instance.nics, node)
 968
 969
 970 def _CheckOSVariant(os_obj, name):
 971   """Check whether an OS name conforms to the os variants specification.
 972
 973   @type os_obj: L{objects.OS}
 974   @param os_obj: OS object to check
 975   @type name: string
 976   @param name: OS name passed by the user, to check for validity
 977
 978   """
 979   if not os_obj.supported_variants:
 980     return
 981   variant = objects.OS.GetVariant(name)
 982   if not variant:
 983     raise errors.OpPrereqError("OS name must include a variant",
 984                                errors.ECODE_INVAL)
 985
 986   if variant not in os_obj.supported_variants:
 987     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
 988
 989
 990 def _GetNodeInstancesInner(cfg, fn):
 991   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 992
 993
 994 def _GetNodeInstances(cfg, node_name):
 995   """Returns a list of all primary and secondary instances on a node.
 996
 997   """
 998
 999   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1000
1001
1002 def _GetNodePrimaryInstances(cfg, node_name):
1003   """Returns primary instances on a node.
1004
1005   """
1006   return _GetNodeInstancesInner(cfg,
1007                                 lambda inst: node_name == inst.primary_node)
1008
1009
1010 def _GetNodeSecondaryInstances(cfg, node_name):
1011   """Returns secondary instances on a node.
1012
1013   """
1014   return _GetNodeInstancesInner(cfg,
1015                                 lambda inst: node_name in inst.secondary_nodes)
1016
1017
1018 def _GetStorageTypeArgs(cfg, storage_type):
1019   """Returns the arguments for a storage type.
1020
1021   """
1022   # Special case for file storage
1023   if storage_type == constants.ST_FILE:
1024     # storage.FileStorage wants a list of storage directories
1025     return [[cfg.GetFileStorageDir()]]
1026
1027   return []
1028
1029
1030 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1031   faulty = []
1032
1033   for dev in instance.disks:
1034     cfg.SetDiskID(dev, node_name)
1035
1036   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1037   result.Raise("Failed to get disk status from node %s" % node_name,
1038                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1039
1040   for idx, bdev_status in enumerate(result.payload):
1041     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1042       faulty.append(idx)
1043
1044   return faulty
1045
1046
1047 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1048   """Check the sanity of iallocator and node arguments and use the
1049   cluster-wide iallocator if appropriate.
1050
1051   Check that at most one of (iallocator, node) is specified. If none is
1052   specified, then the LU's opcode's iallocator slot is filled with the
1053   cluster-wide default iallocator.
1054
1055   @type iallocator_slot: string
1056   @param iallocator_slot: the name of the opcode iallocator slot
1057   @type node_slot: string
1058   @param node_slot: the name of the opcode target node slot
1059
1060   """
1061   node = getattr(lu.op, node_slot, None)
1062   iallocator = getattr(lu.op, iallocator_slot, None)
1063
1064   if node is not None and iallocator is not None:
1065     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1066                                errors.ECODE_INVAL)
1067   elif node is None and iallocator is None:
1068     default_iallocator = lu.cfg.GetDefaultIAllocator()
1069     if default_iallocator:
1070       setattr(lu.op, iallocator_slot, default_iallocator)
1071     else:
1072       raise errors.OpPrereqError("No iallocator or node given and no"
1073                                  " cluster-wide default iallocator found."
1074                                  " Please specify either an iallocator or a"
1075                                  " node, or set a cluster-wide default"
1076                                  " iallocator.")
1077
1078
1079 class LUPostInitCluster(LogicalUnit):
1080   """Logical unit for running hooks after cluster initialization.
1081
1082   """
1083   HPATH = "cluster-init"
1084   HTYPE = constants.HTYPE_CLUSTER
1085
1086   def BuildHooksEnv(self):
1087     """Build hooks env.
1088
1089     """
1090     env = {"OP_TARGET": self.cfg.GetClusterName()}
1091     mn = self.cfg.GetMasterNode()
1092     return env, [], [mn]
1093
1094   def Exec(self, feedback_fn):
1095     """Nothing to do.
1096
1097     """
1098     return True
1099
1100
1101 class LUDestroyCluster(LogicalUnit):
1102   """Logical unit for destroying the cluster.
1103
1104   """
1105   HPATH = "cluster-destroy"
1106   HTYPE = constants.HTYPE_CLUSTER
1107
1108   def BuildHooksEnv(self):
1109     """Build hooks env.
1110
1111     """
1112     env = {"OP_TARGET": self.cfg.GetClusterName()}
1113     return env, [], []
1114
1115   def CheckPrereq(self):
1116     """Check prerequisites.
1117
1118     This checks whether the cluster is empty.
1119
1120     Any errors are signaled by raising errors.OpPrereqError.
1121
1122     """
1123     master = self.cfg.GetMasterNode()
1124
1125     nodelist = self.cfg.GetNodeList()
1126     if len(nodelist) != 1 or nodelist[0] != master:
1127       raise errors.OpPrereqError("There are still %d node(s) in"
1128                                  " this cluster." % (len(nodelist) - 1),
1129                                  errors.ECODE_INVAL)
1130     instancelist = self.cfg.GetInstanceList()
1131     if instancelist:
1132       raise errors.OpPrereqError("There are still %d instance(s) in"
1133                                  " this cluster." % len(instancelist),
1134                                  errors.ECODE_INVAL)
1135
1136   def Exec(self, feedback_fn):
1137     """Destroys the cluster.
1138
1139     """
1140     master = self.cfg.GetMasterNode()
1141
1142     # Run post hooks on master node before it's removed
1143     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1144     try:
1145       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1146     except:
1147       # pylint: disable-msg=W0702
1148       self.LogWarning("Errors occurred running hooks on %s" % master)
1149
1150     result = self.rpc.call_node_stop_master(master, False)
1151     result.Raise("Could not disable the master role")
1152
1153     return master
1154
1155
1156 def _VerifyCertificate(filename):
1157   """Verifies a certificate for LUVerifyCluster.
1158
1159   @type filename: string
1160   @param filename: Path to PEM file
1161
1162   """
1163   try:
1164     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1165                                            utils.ReadFile(filename))
1166   except Exception, err: # pylint: disable-msg=W0703
1167     return (LUVerifyCluster.ETYPE_ERROR,
1168             "Failed to load X509 certificate %s: %s" % (filename, err))
1169
1170   (errcode, msg) = \
1171     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1172                                 constants.SSL_CERT_EXPIRATION_ERROR)
1173
1174   if msg:
1175     fnamemsg = "While verifying %s: %s" % (filename, msg)
1176   else:
1177     fnamemsg = None
1178
1179   if errcode is None:
1180     return (None, fnamemsg)
1181   elif errcode == utils.CERT_WARNING:
1182     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1183   elif errcode == utils.CERT_ERROR:
1184     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1185
1186   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1187
1188
1189 class LUVerifyCluster(LogicalUnit):
1190   """Verifies the cluster status.
1191
1192   """
1193   HPATH = "cluster-verify"
1194   HTYPE = constants.HTYPE_CLUSTER
1195   _OP_PARAMS = [
1196     ("skip_checks", ht.EmptyList,
1197      ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1198     ("verbose", False, ht.TBool),
1199     ("error_codes", False, ht.TBool),
1200     ("debug_simulate_errors", False, ht.TBool),
1201     ]
1202   REQ_BGL = False
1203
1204   TCLUSTER = "cluster"
1205   TNODE = "node"
1206   TINSTANCE = "instance"
1207
1208   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1209   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1210   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1211   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1212   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1213   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1214   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1215   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1216   ENODEDRBD = (TNODE, "ENODEDRBD")
1217   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1218   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1219   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1220   ENODEHV = (TNODE, "ENODEHV")
1221   ENODELVM = (TNODE, "ENODELVM")
1222   ENODEN1 = (TNODE, "ENODEN1")
1223   ENODENET = (TNODE, "ENODENET")
1224   ENODEOS = (TNODE, "ENODEOS")
1225   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1226   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1227   ENODERPC = (TNODE, "ENODERPC")
1228   ENODESSH = (TNODE, "ENODESSH")
1229   ENODEVERSION = (TNODE, "ENODEVERSION")
1230   ENODESETUP = (TNODE, "ENODESETUP")
1231   ENODETIME = (TNODE, "ENODETIME")
1232
1233   ETYPE_FIELD = "code"
1234   ETYPE_ERROR = "ERROR"
1235   ETYPE_WARNING = "WARNING"
1236
1237   class NodeImage(object):
1238     """A class representing the logical and physical status of a node.
1239
1240     @type name: string
1241     @ivar name: the node name to which this object refers
1242     @ivar volumes: a structure as returned from
1243         L{ganeti.backend.GetVolumeList} (runtime)
1244     @ivar instances: a list of running instances (runtime)
1245     @ivar pinst: list of configured primary instances (config)
1246     @ivar sinst: list of configured secondary instances (config)
1247     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1248         of this node (config)
1249     @ivar mfree: free memory, as reported by hypervisor (runtime)
1250     @ivar dfree: free disk, as reported by the node (runtime)
1251     @ivar offline: the offline status (config)
1252     @type rpc_fail: boolean
1253     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1254         not whether the individual keys were correct) (runtime)
1255     @type lvm_fail: boolean
1256     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1257     @type hyp_fail: boolean
1258     @ivar hyp_fail: whether the RPC call didn't return the instance list
1259     @type ghost: boolean
1260     @ivar ghost: whether this is a known node or not (config)
1261     @type os_fail: boolean
1262     @ivar os_fail: whether the RPC call didn't return valid OS data
1263     @type oslist: list
1264     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1265     @type vm_capable: boolean
1266     @ivar vm_capable: whether the node can host instances
1267
1268     """
1269     def __init__(self, offline=False, name=None, vm_capable=True):
1270       self.name = name
1271       self.volumes = {}
1272       self.instances = []
1273       self.pinst = []
1274       self.sinst = []
1275       self.sbp = {}
1276       self.mfree = 0
1277       self.dfree = 0
1278       self.offline = offline
1279       self.vm_capable = vm_capable
1280       self.rpc_fail = False
1281       self.lvm_fail = False
1282       self.hyp_fail = False
1283       self.ghost = False
1284       self.os_fail = False
1285       self.oslist = {}
1286
1287   def ExpandNames(self):
1288     self.needed_locks = {
1289       locking.LEVEL_NODE: locking.ALL_SET,
1290       locking.LEVEL_INSTANCE: locking.ALL_SET,
1291     }
1292     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1293
1294   def _Error(self, ecode, item, msg, *args, **kwargs):
1295     """Format an error message.
1296
1297     Based on the opcode's error_codes parameter, either format a
1298     parseable error code, or a simpler error string.
1299
1300     This must be called only from Exec and functions called from Exec.
1301
1302     """
1303     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1304     itype, etxt = ecode
1305     # first complete the msg
1306     if args:
1307       msg = msg % args
1308     # then format the whole message
1309     if self.op.error_codes:
1310       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1311     else:
1312       if item:
1313         item = " " + item
1314       else:
1315         item = ""
1316       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1317     # and finally report it via the feedback_fn
1318     self._feedback_fn("  - %s" % msg)
1319
1320   def _ErrorIf(self, cond, *args, **kwargs):
1321     """Log an error message if the passed condition is True.
1322
1323     """
1324     cond = bool(cond) or self.op.debug_simulate_errors
1325     if cond:
1326       self._Error(*args, **kwargs)
1327     # do not mark the operation as failed for WARN cases only
1328     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1329       self.bad = self.bad or cond
1330
1331   def _VerifyNode(self, ninfo, nresult):
1332     """Perform some basic validation on data returned from a node.
1333
1334       - check the result data structure is well formed and has all the
1335         mandatory fields
1336       - check ganeti version
1337
1338     @type ninfo: L{objects.Node}
1339     @param ninfo: the node to check
1340     @param nresult: the results from the node
1341     @rtype: boolean
1342     @return: whether overall this call was successful (and we can expect
1343          reasonable values in the respose)
1344
1345     """
1346     node = ninfo.name
1347     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1348
1349     # main result, nresult should be a non-empty dict
1350     test = not nresult or not isinstance(nresult, dict)
1351     _ErrorIf(test, self.ENODERPC, node,
1352                   "unable to verify node: no data returned")
1353     if test:
1354       return False
1355
1356     # compares ganeti version
1357     local_version = constants.PROTOCOL_VERSION
1358     remote_version = nresult.get("version", None)
1359     test = not (remote_version and
1360                 isinstance(remote_version, (list, tuple)) and
1361                 len(remote_version) == 2)
1362     _ErrorIf(test, self.ENODERPC, node,
1363              "connection to node returned invalid data")
1364     if test:
1365       return False
1366
1367     test = local_version != remote_version[0]
1368     _ErrorIf(test, self.ENODEVERSION, node,
1369              "incompatible protocol versions: master %s,"
1370              " node %s", local_version, remote_version[0])
1371     if test:
1372       return False
1373
1374     # node seems compatible, we can actually try to look into its results
1375
1376     # full package version
1377     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1378                   self.ENODEVERSION, node,
1379                   "software version mismatch: master %s, node %s",
1380                   constants.RELEASE_VERSION, remote_version[1],
1381                   code=self.ETYPE_WARNING)
1382
1383     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1384     if ninfo.vm_capable and isinstance(hyp_result, dict):
1385       for hv_name, hv_result in hyp_result.iteritems():
1386         test = hv_result is not None
1387         _ErrorIf(test, self.ENODEHV, node,
1388                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1389
1390     test = nresult.get(constants.NV_NODESETUP,
1391                            ["Missing NODESETUP results"])
1392     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1393              "; ".join(test))
1394
1395     return True
1396
1397   def _VerifyNodeTime(self, ninfo, nresult,
1398                       nvinfo_starttime, nvinfo_endtime):
1399     """Check the node time.
1400
1401     @type ninfo: L{objects.Node}
1402     @param ninfo: the node to check
1403     @param nresult: the remote results for the node
1404     @param nvinfo_starttime: the start time of the RPC call
1405     @param nvinfo_endtime: the end time of the RPC call
1406
1407     """
1408     node = ninfo.name
1409     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1410
1411     ntime = nresult.get(constants.NV_TIME, None)
1412     try:
1413       ntime_merged = utils.MergeTime(ntime)
1414     except (ValueError, TypeError):
1415       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1416       return
1417
1418     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1419       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1420     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1421       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1422     else:
1423       ntime_diff = None
1424
1425     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1426              "Node time diverges by at least %s from master node time",
1427              ntime_diff)
1428
1429   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1430     """Check the node time.
1431
1432     @type ninfo: L{objects.Node}
1433     @param ninfo: the node to check
1434     @param nresult: the remote results for the node
1435     @param vg_name: the configured VG name
1436
1437     """
1438     if vg_name is None:
1439       return
1440
1441     node = ninfo.name
1442     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1443
1444     # checks vg existence and size > 20G
1445     vglist = nresult.get(constants.NV_VGLIST, None)
1446     test = not vglist
1447     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1448     if not test:
1449       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1450                                             constants.MIN_VG_SIZE)
1451       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1452
1453     # check pv names
1454     pvlist = nresult.get(constants.NV_PVLIST, None)
1455     test = pvlist is None
1456     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1457     if not test:
1458       # check that ':' is not present in PV names, since it's a
1459       # special character for lvcreate (denotes the range of PEs to
1460       # use on the PV)
1461       for _, pvname, owner_vg in pvlist:
1462         test = ":" in pvname
1463         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1464                  " '%s' of VG '%s'", pvname, owner_vg)
1465
1466   def _VerifyNodeNetwork(self, ninfo, nresult):
1467     """Check the node time.
1468
1469     @type ninfo: L{objects.Node}
1470     @param ninfo: the node to check
1471     @param nresult: the remote results for the node
1472
1473     """
1474     node = ninfo.name
1475     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1476
1477     test = constants.NV_NODELIST not in nresult
1478     _ErrorIf(test, self.ENODESSH, node,
1479              "node hasn't returned node ssh connectivity data")
1480     if not test:
1481       if nresult[constants.NV_NODELIST]:
1482         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1483           _ErrorIf(True, self.ENODESSH, node,
1484                    "ssh communication with node '%s': %s", a_node, a_msg)
1485
1486     test = constants.NV_NODENETTEST not in nresult
1487     _ErrorIf(test, self.ENODENET, node,
1488              "node hasn't returned node tcp connectivity data")
1489     if not test:
1490       if nresult[constants.NV_NODENETTEST]:
1491         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1492         for anode in nlist:
1493           _ErrorIf(True, self.ENODENET, node,
1494                    "tcp communication with node '%s': %s",
1495                    anode, nresult[constants.NV_NODENETTEST][anode])
1496
1497     test = constants.NV_MASTERIP not in nresult
1498     _ErrorIf(test, self.ENODENET, node,
1499              "node hasn't returned node master IP reachability data")
1500     if not test:
1501       if not nresult[constants.NV_MASTERIP]:
1502         if node == self.master_node:
1503           msg = "the master node cannot reach the master IP (not configured?)"
1504         else:
1505           msg = "cannot reach the master IP"
1506         _ErrorIf(True, self.ENODENET, node, msg)
1507
1508   def _VerifyInstance(self, instance, instanceconfig, node_image,
1509                       diskstatus):
1510     """Verify an instance.
1511
1512     This function checks to see if the required block devices are
1513     available on the instance's node.
1514
1515     """
1516     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1517     node_current = instanceconfig.primary_node
1518
1519     node_vol_should = {}
1520     instanceconfig.MapLVsByNode(node_vol_should)
1521
1522     for node in node_vol_should:
1523       n_img = node_image[node]
1524       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1525         # ignore missing volumes on offline or broken nodes
1526         continue
1527       for volume in node_vol_should[node]:
1528         test = volume not in n_img.volumes
1529         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1530                  "volume %s missing on node %s", volume, node)
1531
1532     if instanceconfig.admin_up:
1533       pri_img = node_image[node_current]
1534       test = instance not in pri_img.instances and not pri_img.offline
1535       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1536                "instance not running on its primary node %s",
1537                node_current)
1538
1539     for node, n_img in node_image.items():
1540       if (not node == node_current):
1541         test = instance in n_img.instances
1542         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1543                  "instance should not run on node %s", node)
1544
1545     diskdata = [(nname, success, status, idx)
1546                 for (nname, disks) in diskstatus.items()
1547                 for idx, (success, status) in enumerate(disks)]
1548
1549     for nname, success, bdev_status, idx in diskdata:
1550       _ErrorIf(instanceconfig.admin_up and not success,
1551                self.EINSTANCEFAULTYDISK, instance,
1552                "couldn't retrieve status for disk/%s on %s: %s",
1553                idx, nname, bdev_status)
1554       _ErrorIf((instanceconfig.admin_up and success and
1555                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1556                self.EINSTANCEFAULTYDISK, instance,
1557                "disk/%s on %s is faulty", idx, nname)
1558
1559   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1560     """Verify if there are any unknown volumes in the cluster.
1561
1562     The .os, .swap and backup volumes are ignored. All other volumes are
1563     reported as unknown.
1564
1565     @type reserved: L{ganeti.utils.FieldSet}
1566     @param reserved: a FieldSet of reserved volume names
1567
1568     """
1569     for node, n_img in node_image.items():
1570       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1571         # skip non-healthy nodes
1572         continue
1573       for volume in n_img.volumes:
1574         test = ((node not in node_vol_should or
1575                 volume not in node_vol_should[node]) and
1576                 not reserved.Matches(volume))
1577         self._ErrorIf(test, self.ENODEORPHANLV, node,
1578                       "volume %s is unknown", volume)
1579
1580   def _VerifyOrphanInstances(self, instancelist, node_image):
1581     """Verify the list of running instances.
1582
1583     This checks what instances are running but unknown to the cluster.
1584
1585     """
1586     for node, n_img in node_image.items():
1587       for o_inst in n_img.instances:
1588         test = o_inst not in instancelist
1589         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1590                       "instance %s on node %s should not exist", o_inst, node)
1591
1592   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1593     """Verify N+1 Memory Resilience.
1594
1595     Check that if one single node dies we can still start all the
1596     instances it was primary for.
1597
1598     """
1599     for node, n_img in node_image.items():
1600       # This code checks that every node which is now listed as
1601       # secondary has enough memory to host all instances it is
1602       # supposed to should a single other node in the cluster fail.
1603       # FIXME: not ready for failover to an arbitrary node
1604       # FIXME: does not support file-backed instances
1605       # WARNING: we currently take into account down instances as well
1606       # as up ones, considering that even if they're down someone
1607       # might want to start them even in the event of a node failure.
1608       for prinode, instances in n_img.sbp.items():
1609         needed_mem = 0
1610         for instance in instances:
1611           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1612           if bep[constants.BE_AUTO_BALANCE]:
1613             needed_mem += bep[constants.BE_MEMORY]
1614         test = n_img.mfree < needed_mem
1615         self._ErrorIf(test, self.ENODEN1, node,
1616                       "not enough memory on to accommodate"
1617                       " failovers should peer node %s fail", prinode)
1618
1619   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1620                        master_files):
1621     """Verifies and computes the node required file checksums.
1622
1623     @type ninfo: L{objects.Node}
1624     @param ninfo: the node to check
1625     @param nresult: the remote results for the node
1626     @param file_list: required list of files
1627     @param local_cksum: dictionary of local files and their checksums
1628     @param master_files: list of files that only masters should have
1629
1630     """
1631     node = ninfo.name
1632     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1633
1634     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1635     test = not isinstance(remote_cksum, dict)
1636     _ErrorIf(test, self.ENODEFILECHECK, node,
1637              "node hasn't returned file checksum data")
1638     if test:
1639       return
1640
1641     for file_name in file_list:
1642       node_is_mc = ninfo.master_candidate
1643       must_have = (file_name not in master_files) or node_is_mc
1644       # missing
1645       test1 = file_name not in remote_cksum
1646       # invalid checksum
1647       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1648       # existing and good
1649       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1650       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1651                "file '%s' missing", file_name)
1652       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1653                "file '%s' has wrong checksum", file_name)
1654       # not candidate and this is not a must-have file
1655       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1656                "file '%s' should not exist on non master"
1657                " candidates (and the file is outdated)", file_name)
1658       # all good, except non-master/non-must have combination
1659       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1660                "file '%s' should not exist"
1661                " on non master candidates", file_name)
1662
1663   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1664                       drbd_map):
1665     """Verifies and the node DRBD status.
1666
1667     @type ninfo: L{objects.Node}
1668     @param ninfo: the node to check
1669     @param nresult: the remote results for the node
1670     @param instanceinfo: the dict of instances
1671     @param drbd_helper: the configured DRBD usermode helper
1672     @param drbd_map: the DRBD map as returned by
1673         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1674
1675     """
1676     node = ninfo.name
1677     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1678
1679     if drbd_helper:
1680       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1681       test = (helper_result == None)
1682       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1683                "no drbd usermode helper returned")
1684       if helper_result:
1685         status, payload = helper_result
1686         test = not status
1687         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1688                  "drbd usermode helper check unsuccessful: %s", payload)
1689         test = status and (payload != drbd_helper)
1690         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1691                  "wrong drbd usermode helper: %s", payload)
1692
1693     # compute the DRBD minors
1694     node_drbd = {}
1695     for minor, instance in drbd_map[node].items():
1696       test = instance not in instanceinfo
1697       _ErrorIf(test, self.ECLUSTERCFG, None,
1698                "ghost instance '%s' in temporary DRBD map", instance)
1699         # ghost instance should not be running, but otherwise we
1700         # don't give double warnings (both ghost instance and
1701         # unallocated minor in use)
1702       if test:
1703         node_drbd[minor] = (instance, False)
1704       else:
1705         instance = instanceinfo[instance]
1706         node_drbd[minor] = (instance.name, instance.admin_up)
1707
1708     # and now check them
1709     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1710     test = not isinstance(used_minors, (tuple, list))
1711     _ErrorIf(test, self.ENODEDRBD, node,
1712              "cannot parse drbd status file: %s", str(used_minors))
1713     if test:
1714       # we cannot check drbd status
1715       return
1716
1717     for minor, (iname, must_exist) in node_drbd.items():
1718       test = minor not in used_minors and must_exist
1719       _ErrorIf(test, self.ENODEDRBD, node,
1720                "drbd minor %d of instance %s is not active", minor, iname)
1721     for minor in used_minors:
1722       test = minor not in node_drbd
1723       _ErrorIf(test, self.ENODEDRBD, node,
1724                "unallocated drbd minor %d is in use", minor)
1725
1726   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1727     """Builds the node OS structures.
1728
1729     @type ninfo: L{objects.Node}
1730     @param ninfo: the node to check
1731     @param nresult: the remote results for the node
1732     @param nimg: the node image object
1733
1734     """
1735     node = ninfo.name
1736     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1737
1738     remote_os = nresult.get(constants.NV_OSLIST, None)
1739     test = (not isinstance(remote_os, list) or
1740             not compat.all(isinstance(v, list) and len(v) == 7
1741                            for v in remote_os))
1742
1743     _ErrorIf(test, self.ENODEOS, node,
1744              "node hasn't returned valid OS data")
1745
1746     nimg.os_fail = test
1747
1748     if test:
1749       return
1750
1751     os_dict = {}
1752
1753     for (name, os_path, status, diagnose,
1754          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1755
1756       if name not in os_dict:
1757         os_dict[name] = []
1758
1759       # parameters is a list of lists instead of list of tuples due to
1760       # JSON lacking a real tuple type, fix it:
1761       parameters = [tuple(v) for v in parameters]
1762       os_dict[name].append((os_path, status, diagnose,
1763                             set(variants), set(parameters), set(api_ver)))
1764
1765     nimg.oslist = os_dict
1766
1767   def _VerifyNodeOS(self, ninfo, nimg, base):
1768     """Verifies the node OS list.
1769
1770     @type ninfo: L{objects.Node}
1771     @param ninfo: the node to check
1772     @param nimg: the node image object
1773     @param base: the 'template' node we match against (e.g. from the master)
1774
1775     """
1776     node = ninfo.name
1777     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1778
1779     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1780
1781     for os_name, os_data in nimg.oslist.items():
1782       assert os_data, "Empty OS status for OS %s?!" % os_name
1783       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1784       _ErrorIf(not f_status, self.ENODEOS, node,
1785                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1786       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1787                "OS '%s' has multiple entries (first one shadows the rest): %s",
1788                os_name, utils.CommaJoin([v[0] for v in os_data]))
1789       # this will catched in backend too
1790       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1791                and not f_var, self.ENODEOS, node,
1792                "OS %s with API at least %d does not declare any variant",
1793                os_name, constants.OS_API_V15)
1794       # comparisons with the 'base' image
1795       test = os_name not in base.oslist
1796       _ErrorIf(test, self.ENODEOS, node,
1797                "Extra OS %s not present on reference node (%s)",
1798                os_name, base.name)
1799       if test:
1800         continue
1801       assert base.oslist[os_name], "Base node has empty OS status?"
1802       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1803       if not b_status:
1804         # base OS is invalid, skipping
1805         continue
1806       for kind, a, b in [("API version", f_api, b_api),
1807                          ("variants list", f_var, b_var),
1808                          ("parameters", f_param, b_param)]:
1809         _ErrorIf(a != b, self.ENODEOS, node,
1810                  "OS %s %s differs from reference node %s: %s vs. %s",
1811                  kind, os_name, base.name,
1812                  utils.CommaJoin(a), utils.CommaJoin(b))
1813
1814     # check any missing OSes
1815     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1816     _ErrorIf(missing, self.ENODEOS, node,
1817              "OSes present on reference node %s but missing on this node: %s",
1818              base.name, utils.CommaJoin(missing))
1819
1820   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1821     """Verifies and updates the node volume data.
1822
1823     This function will update a L{NodeImage}'s internal structures
1824     with data from the remote call.
1825
1826     @type ninfo: L{objects.Node}
1827     @param ninfo: the node to check
1828     @param nresult: the remote results for the node
1829     @param nimg: the node image object
1830     @param vg_name: the configured VG name
1831
1832     """
1833     node = ninfo.name
1834     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1835
1836     nimg.lvm_fail = True
1837     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1838     if vg_name is None:
1839       pass
1840     elif isinstance(lvdata, basestring):
1841       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1842                utils.SafeEncode(lvdata))
1843     elif not isinstance(lvdata, dict):
1844       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1845     else:
1846       nimg.volumes = lvdata
1847       nimg.lvm_fail = False
1848
1849   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1850     """Verifies and updates the node instance list.
1851
1852     If the listing was successful, then updates this node's instance
1853     list. Otherwise, it marks the RPC call as failed for the instance
1854     list key.
1855
1856     @type ninfo: L{objects.Node}
1857     @param ninfo: the node to check
1858     @param nresult: the remote results for the node
1859     @param nimg: the node image object
1860
1861     """
1862     idata = nresult.get(constants.NV_INSTANCELIST, None)
1863     test = not isinstance(idata, list)
1864     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1865                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1866     if test:
1867       nimg.hyp_fail = True
1868     else:
1869       nimg.instances = idata
1870
1871   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1872     """Verifies and computes a node information map
1873
1874     @type ninfo: L{objects.Node}
1875     @param ninfo: the node to check
1876     @param nresult: the remote results for the node
1877     @param nimg: the node image object
1878     @param vg_name: the configured VG name
1879
1880     """
1881     node = ninfo.name
1882     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1883
1884     # try to read free memory (from the hypervisor)
1885     hv_info = nresult.get(constants.NV_HVINFO, None)
1886     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1887     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1888     if not test:
1889       try:
1890         nimg.mfree = int(hv_info["memory_free"])
1891       except (ValueError, TypeError):
1892         _ErrorIf(True, self.ENODERPC, node,
1893                  "node returned invalid nodeinfo, check hypervisor")
1894
1895     # FIXME: devise a free space model for file based instances as well
1896     if vg_name is not None:
1897       test = (constants.NV_VGLIST not in nresult or
1898               vg_name not in nresult[constants.NV_VGLIST])
1899       _ErrorIf(test, self.ENODELVM, node,
1900                "node didn't return data for the volume group '%s'"
1901                " - it is either missing or broken", vg_name)
1902       if not test:
1903         try:
1904           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1905         except (ValueError, TypeError):
1906           _ErrorIf(True, self.ENODERPC, node,
1907                    "node returned invalid LVM info, check LVM status")
1908
1909   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
1910     """Gets per-disk status information for all instances.
1911
1912     @type nodelist: list of strings
1913     @param nodelist: Node names
1914     @type node_image: dict of (name, L{objects.Node})
1915     @param node_image: Node objects
1916     @type instanceinfo: dict of (name, L{objects.Instance})
1917     @param instanceinfo: Instance objects
1918     @rtype: {instance: {node: [(succes, payload)]}}
1919     @return: a dictionary of per-instance dictionaries with nodes as
1920         keys and disk information as values; the disk information is a
1921         list of tuples (success, payload)
1922
1923     """
1924     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1925
1926     node_disks = {}
1927     node_disks_devonly = {}
1928     diskless_instances = set()
1929     diskless = constants.DT_DISKLESS
1930
1931     for nname in nodelist:
1932       node_instances = list(itertools.chain(node_image[nname].pinst,
1933                                             node_image[nname].sinst))
1934       diskless_instances.update(inst for inst in node_instances
1935                                 if instanceinfo[inst].disk_template == diskless)
1936       disks = [(inst, disk)
1937                for inst in node_instances
1938                for disk in instanceinfo[inst].disks]
1939
1940       if not disks:
1941         # No need to collect data
1942         continue
1943
1944       node_disks[nname] = disks
1945
1946       # Creating copies as SetDiskID below will modify the objects and that can
1947       # lead to incorrect data returned from nodes
1948       devonly = [dev.Copy() for (_, dev) in disks]
1949
1950       for dev in devonly:
1951         self.cfg.SetDiskID(dev, nname)
1952
1953       node_disks_devonly[nname] = devonly
1954
1955     assert len(node_disks) == len(node_disks_devonly)
1956
1957     # Collect data from all nodes with disks
1958     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
1959                                                           node_disks_devonly)
1960
1961     assert len(result) == len(node_disks)
1962
1963     instdisk = {}
1964
1965     for (nname, nres) in result.items():
1966       disks = node_disks[nname]
1967
1968       if nres.offline:
1969         # No data from this node
1970         data = len(disks) * [(False, "node offline")]
1971       else:
1972         msg = nres.fail_msg
1973         _ErrorIf(msg, self.ENODERPC, nname,
1974                  "while getting disk information: %s", msg)
1975         if msg:
1976           # No data from this node
1977           data = len(disks) * [(False, msg)]
1978         else:
1979           data = []
1980           for idx, i in enumerate(nres.payload):
1981             if isinstance(i, (tuple, list)) and len(i) == 2:
1982               data.append(i)
1983             else:
1984               logging.warning("Invalid result from node %s, entry %d: %s",
1985                               nname, idx, i)
1986               data.append((False, "Invalid result from the remote node"))
1987
1988       for ((inst, _), status) in zip(disks, data):
1989         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
1990
1991     # Add empty entries for diskless instances.
1992     for inst in diskless_instances:
1993       assert inst not in instdisk
1994       instdisk[inst] = {}
1995
1996     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
1997                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
1998                       compat.all(isinstance(s, (tuple, list)) and
1999                                  len(s) == 2 for s in statuses)
2000                       for inst, nnames in instdisk.items()
2001                       for nname, statuses in nnames.items())
2002     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2003
2004     return instdisk
2005
2006   def BuildHooksEnv(self):
2007     """Build hooks env.
2008
2009     Cluster-Verify hooks just ran in the post phase and their failure makes
2010     the output be logged in the verify output and the verification to fail.
2011
2012     """
2013     all_nodes = self.cfg.GetNodeList()
2014     env = {
2015       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2016       }
2017     for node in self.cfg.GetAllNodesInfo().values():
2018       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
2019
2020     return env, [], all_nodes
2021
2022   def Exec(self, feedback_fn):
2023     """Verify integrity of cluster, performing various test on nodes.
2024
2025     """
2026     self.bad = False
2027     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2028     verbose = self.op.verbose
2029     self._feedback_fn = feedback_fn
2030     feedback_fn("* Verifying global settings")
2031     for msg in self.cfg.VerifyConfig():
2032       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2033
2034     # Check the cluster certificates
2035     for cert_filename in constants.ALL_CERT_FILES:
2036       (errcode, msg) = _VerifyCertificate(cert_filename)
2037       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2038
2039     vg_name = self.cfg.GetVGName()
2040     drbd_helper = self.cfg.GetDRBDHelper()
2041     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2042     cluster = self.cfg.GetClusterInfo()
2043     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2044     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2045     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2046     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2047                         for iname in instancelist)
2048     i_non_redundant = [] # Non redundant instances
2049     i_non_a_balanced = [] # Non auto-balanced instances
2050     n_offline = 0 # Count of offline nodes
2051     n_drained = 0 # Count of nodes being drained
2052     node_vol_should = {}
2053
2054     # FIXME: verify OS list
2055     # do local checksums
2056     master_files = [constants.CLUSTER_CONF_FILE]
2057     master_node = self.master_node = self.cfg.GetMasterNode()
2058     master_ip = self.cfg.GetMasterIP()
2059
2060     file_names = ssconf.SimpleStore().GetFileList()
2061     file_names.extend(constants.ALL_CERT_FILES)
2062     file_names.extend(master_files)
2063     if cluster.modify_etc_hosts:
2064       file_names.append(constants.ETC_HOSTS)
2065
2066     local_checksums = utils.FingerprintFiles(file_names)
2067
2068     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2069     node_verify_param = {
2070       constants.NV_FILELIST: file_names,
2071       constants.NV_NODELIST: [node.name for node in nodeinfo
2072                               if not node.offline],
2073       constants.NV_HYPERVISOR: hypervisors,
2074       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2075                                   node.secondary_ip) for node in nodeinfo
2076                                  if not node.offline],
2077       constants.NV_INSTANCELIST: hypervisors,
2078       constants.NV_VERSION: None,
2079       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2080       constants.NV_NODESETUP: None,
2081       constants.NV_TIME: None,
2082       constants.NV_MASTERIP: (master_node, master_ip),
2083       constants.NV_OSLIST: None,
2084       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2085       }
2086
2087     if vg_name is not None:
2088       node_verify_param[constants.NV_VGLIST] = None
2089       node_verify_param[constants.NV_LVLIST] = vg_name
2090       node_verify_param[constants.NV_PVLIST] = [vg_name]
2091       node_verify_param[constants.NV_DRBDLIST] = None
2092
2093     if drbd_helper:
2094       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2095
2096     # Build our expected cluster state
2097     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2098                                                  name=node.name,
2099                                                  vm_capable=node.vm_capable))
2100                       for node in nodeinfo)
2101
2102     for instance in instancelist:
2103       inst_config = instanceinfo[instance]
2104
2105       for nname in inst_config.all_nodes:
2106         if nname not in node_image:
2107           # ghost node
2108           gnode = self.NodeImage(name=nname)
2109           gnode.ghost = True
2110           node_image[nname] = gnode
2111
2112       inst_config.MapLVsByNode(node_vol_should)
2113
2114       pnode = inst_config.primary_node
2115       node_image[pnode].pinst.append(instance)
2116
2117       for snode in inst_config.secondary_nodes:
2118         nimg = node_image[snode]
2119         nimg.sinst.append(instance)
2120         if pnode not in nimg.sbp:
2121           nimg.sbp[pnode] = []
2122         nimg.sbp[pnode].append(instance)
2123
2124     # At this point, we have the in-memory data structures complete,
2125     # except for the runtime information, which we'll gather next
2126
2127     # Due to the way our RPC system works, exact response times cannot be
2128     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2129     # time before and after executing the request, we can at least have a time
2130     # window.
2131     nvinfo_starttime = time.time()
2132     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2133                                            self.cfg.GetClusterName())
2134     nvinfo_endtime = time.time()
2135
2136     all_drbd_map = self.cfg.ComputeDRBDMap()
2137
2138     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2139     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2140
2141     feedback_fn("* Verifying node status")
2142
2143     refos_img = None
2144
2145     for node_i in nodeinfo:
2146       node = node_i.name
2147       nimg = node_image[node]
2148
2149       if node_i.offline:
2150         if verbose:
2151           feedback_fn("* Skipping offline node %s" % (node,))
2152         n_offline += 1
2153         continue
2154
2155       if node == master_node:
2156         ntype = "master"
2157       elif node_i.master_candidate:
2158         ntype = "master candidate"
2159       elif node_i.drained:
2160         ntype = "drained"
2161         n_drained += 1
2162       else:
2163         ntype = "regular"
2164       if verbose:
2165         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2166
2167       msg = all_nvinfo[node].fail_msg
2168       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2169       if msg:
2170         nimg.rpc_fail = True
2171         continue
2172
2173       nresult = all_nvinfo[node].payload
2174
2175       nimg.call_ok = self._VerifyNode(node_i, nresult)
2176       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2177       self._VerifyNodeNetwork(node_i, nresult)
2178       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2179                             master_files)
2180
2181       if nimg.vm_capable:
2182         self._VerifyNodeLVM(node_i, nresult, vg_name)
2183         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2184                              all_drbd_map)
2185
2186         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2187         self._UpdateNodeInstances(node_i, nresult, nimg)
2188         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2189         self._UpdateNodeOS(node_i, nresult, nimg)
2190         if not nimg.os_fail:
2191           if refos_img is None:
2192             refos_img = nimg
2193           self._VerifyNodeOS(node_i, nimg, refos_img)
2194
2195     feedback_fn("* Verifying instance status")
2196     for instance in instancelist:
2197       if verbose:
2198         feedback_fn("* Verifying instance %s" % instance)
2199       inst_config = instanceinfo[instance]
2200       self._VerifyInstance(instance, inst_config, node_image,
2201                            instdisk[instance])
2202       inst_nodes_offline = []
2203
2204       pnode = inst_config.primary_node
2205       pnode_img = node_image[pnode]
2206       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2207                self.ENODERPC, pnode, "instance %s, connection to"
2208                " primary node failed", instance)
2209
2210       if pnode_img.offline:
2211         inst_nodes_offline.append(pnode)
2212
2213       # If the instance is non-redundant we cannot survive losing its primary
2214       # node, so we are not N+1 compliant. On the other hand we have no disk
2215       # templates with more than one secondary so that situation is not well
2216       # supported either.
2217       # FIXME: does not support file-backed instances
2218       if not inst_config.secondary_nodes:
2219         i_non_redundant.append(instance)
2220       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2221                instance, "instance has multiple secondary nodes: %s",
2222                utils.CommaJoin(inst_config.secondary_nodes),
2223                code=self.ETYPE_WARNING)
2224
2225       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2226         i_non_a_balanced.append(instance)
2227
2228       for snode in inst_config.secondary_nodes:
2229         s_img = node_image[snode]
2230         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2231                  "instance %s, connection to secondary node failed", instance)
2232
2233         if s_img.offline:
2234           inst_nodes_offline.append(snode)
2235
2236       # warn that the instance lives on offline nodes
2237       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2238                "instance lives on offline node(s) %s",
2239                utils.CommaJoin(inst_nodes_offline))
2240       # ... or ghost/non-vm_capable nodes
2241       for node in inst_config.all_nodes:
2242         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2243                  "instance lives on ghost node %s", node)
2244         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2245                  instance, "instance lives on non-vm_capable node %s", node)
2246
2247     feedback_fn("* Verifying orphan volumes")
2248     reserved = utils.FieldSet(*cluster.reserved_lvs)
2249     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2250
2251     feedback_fn("* Verifying orphan instances")
2252     self._VerifyOrphanInstances(instancelist, node_image)
2253
2254     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2255       feedback_fn("* Verifying N+1 Memory redundancy")
2256       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2257
2258     feedback_fn("* Other Notes")
2259     if i_non_redundant:
2260       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2261                   % len(i_non_redundant))
2262
2263     if i_non_a_balanced:
2264       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2265                   % len(i_non_a_balanced))
2266
2267     if n_offline:
2268       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2269
2270     if n_drained:
2271       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2272
2273     return not self.bad
2274
2275   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2276     """Analyze the post-hooks' result
2277
2278     This method analyses the hook result, handles it, and sends some
2279     nicely-formatted feedback back to the user.
2280
2281     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2282         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2283     @param hooks_results: the results of the multi-node hooks rpc call
2284     @param feedback_fn: function used send feedback back to the caller
2285     @param lu_result: previous Exec result
2286     @return: the new Exec result, based on the previous result
2287         and hook results
2288
2289     """
2290     # We only really run POST phase hooks, and are only interested in
2291     # their results
2292     if phase == constants.HOOKS_PHASE_POST:
2293       # Used to change hooks' output to proper indentation
2294       indent_re = re.compile('^', re.M)
2295       feedback_fn("* Hooks Results")
2296       assert hooks_results, "invalid result from hooks"
2297
2298       for node_name in hooks_results:
2299         res = hooks_results[node_name]
2300         msg = res.fail_msg
2301         test = msg and not res.offline
2302         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2303                       "Communication failure in hooks execution: %s", msg)
2304         if res.offline or msg:
2305           # No need to investigate payload if node is offline or gave an error.
2306           # override manually lu_result here as _ErrorIf only
2307           # overrides self.bad
2308           lu_result = 1
2309           continue
2310         for script, hkr, output in res.payload:
2311           test = hkr == constants.HKR_FAIL
2312           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2313                         "Script %s failed, output:", script)
2314           if test:
2315             output = indent_re.sub('      ', output)
2316             feedback_fn("%s" % output)
2317             lu_result = 0
2318
2319       return lu_result
2320
2321
2322 class LUVerifyDisks(NoHooksLU):
2323   """Verifies the cluster disks status.
2324
2325   """
2326   REQ_BGL = False
2327
2328   def ExpandNames(self):
2329     self.needed_locks = {
2330       locking.LEVEL_NODE: locking.ALL_SET,
2331       locking.LEVEL_INSTANCE: locking.ALL_SET,
2332     }
2333     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2334
2335   def Exec(self, feedback_fn):
2336     """Verify integrity of cluster disks.
2337
2338     @rtype: tuple of three items
2339     @return: a tuple of (dict of node-to-node_error, list of instances
2340         which need activate-disks, dict of instance: (node, volume) for
2341         missing volumes
2342
2343     """
2344     result = res_nodes, res_instances, res_missing = {}, [], {}
2345
2346     vg_name = self.cfg.GetVGName()
2347     nodes = utils.NiceSort(self.cfg.GetNodeList())
2348     instances = [self.cfg.GetInstanceInfo(name)
2349                  for name in self.cfg.GetInstanceList()]
2350
2351     nv_dict = {}
2352     for inst in instances:
2353       inst_lvs = {}
2354       if (not inst.admin_up or
2355           inst.disk_template not in constants.DTS_NET_MIRROR):
2356         continue
2357       inst.MapLVsByNode(inst_lvs)
2358       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2359       for node, vol_list in inst_lvs.iteritems():
2360         for vol in vol_list:
2361           nv_dict[(node, vol)] = inst
2362
2363     if not nv_dict:
2364       return result
2365
2366     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2367
2368     for node in nodes:
2369       # node_volume
2370       node_res = node_lvs[node]
2371       if node_res.offline:
2372         continue
2373       msg = node_res.fail_msg
2374       if msg:
2375         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2376         res_nodes[node] = msg
2377         continue
2378
2379       lvs = node_res.payload
2380       for lv_name, (_, _, lv_online) in lvs.items():
2381         inst = nv_dict.pop((node, lv_name), None)
2382         if (not lv_online and inst is not None
2383             and inst.name not in res_instances):
2384           res_instances.append(inst.name)
2385
2386     # any leftover items in nv_dict are missing LVs, let's arrange the
2387     # data better
2388     for key, inst in nv_dict.iteritems():
2389       if inst.name not in res_missing:
2390         res_missing[inst.name] = []
2391       res_missing[inst.name].append(key)
2392
2393     return result
2394
2395
2396 class LURepairDiskSizes(NoHooksLU):
2397   """Verifies the cluster disks sizes.
2398
2399   """
2400   _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2401   REQ_BGL = False
2402
2403   def ExpandNames(self):
2404     if self.op.instances:
2405       self.wanted_names = []
2406       for name in self.op.instances:
2407         full_name = _ExpandInstanceName(self.cfg, name)
2408         self.wanted_names.append(full_name)
2409       self.needed_locks = {
2410         locking.LEVEL_NODE: [],
2411         locking.LEVEL_INSTANCE: self.wanted_names,
2412         }
2413       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2414     else:
2415       self.wanted_names = None
2416       self.needed_locks = {
2417         locking.LEVEL_NODE: locking.ALL_SET,
2418         locking.LEVEL_INSTANCE: locking.ALL_SET,
2419         }
2420     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2421
2422   def DeclareLocks(self, level):
2423     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2424       self._LockInstancesNodes(primary_only=True)
2425
2426   def CheckPrereq(self):
2427     """Check prerequisites.
2428
2429     This only checks the optional instance list against the existing names.
2430
2431     """
2432     if self.wanted_names is None:
2433       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2434
2435     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2436                              in self.wanted_names]
2437
2438   def _EnsureChildSizes(self, disk):
2439     """Ensure children of the disk have the needed disk size.
2440
2441     This is valid mainly for DRBD8 and fixes an issue where the
2442     children have smaller disk size.
2443
2444     @param disk: an L{ganeti.objects.Disk} object
2445
2446     """
2447     if disk.dev_type == constants.LD_DRBD8:
2448       assert disk.children, "Empty children for DRBD8?"
2449       fchild = disk.children[0]
2450       mismatch = fchild.size < disk.size
2451       if mismatch:
2452         self.LogInfo("Child disk has size %d, parent %d, fixing",
2453                      fchild.size, disk.size)
2454         fchild.size = disk.size
2455
2456       # and we recurse on this child only, not on the metadev
2457       return self._EnsureChildSizes(fchild) or mismatch
2458     else:
2459       return False
2460
2461   def Exec(self, feedback_fn):
2462     """Verify the size of cluster disks.
2463
2464     """
2465     # TODO: check child disks too
2466     # TODO: check differences in size between primary/secondary nodes
2467     per_node_disks = {}
2468     for instance in self.wanted_instances:
2469       pnode = instance.primary_node
2470       if pnode not in per_node_disks:
2471         per_node_disks[pnode] = []
2472       for idx, disk in enumerate(instance.disks):
2473         per_node_disks[pnode].append((instance, idx, disk))
2474
2475     changed = []
2476     for node, dskl in per_node_disks.items():
2477       newl = [v[2].Copy() for v in dskl]
2478       for dsk in newl:
2479         self.cfg.SetDiskID(dsk, node)
2480       result = self.rpc.call_blockdev_getsizes(node, newl)
2481       if result.fail_msg:
2482         self.LogWarning("Failure in blockdev_getsizes call to node"
2483                         " %s, ignoring", node)
2484         continue
2485       if len(result.data) != len(dskl):
2486         self.LogWarning("Invalid result from node %s, ignoring node results",
2487                         node)
2488         continue
2489       for ((instance, idx, disk), size) in zip(dskl, result.data):
2490         if size is None:
2491           self.LogWarning("Disk %d of instance %s did not return size"
2492                           " information, ignoring", idx, instance.name)
2493           continue
2494         if not isinstance(size, (int, long)):
2495           self.LogWarning("Disk %d of instance %s did not return valid"
2496                           " size information, ignoring", idx, instance.name)
2497           continue
2498         size = size >> 20
2499         if size != disk.size:
2500           self.LogInfo("Disk %d of instance %s has mismatched size,"
2501                        " correcting: recorded %d, actual %d", idx,
2502                        instance.name, disk.size, size)
2503           disk.size = size
2504           self.cfg.Update(instance, feedback_fn)
2505           changed.append((instance.name, idx, size))
2506         if self._EnsureChildSizes(disk):
2507           self.cfg.Update(instance, feedback_fn)
2508           changed.append((instance.name, idx, disk.size))
2509     return changed
2510
2511
2512 class LURenameCluster(LogicalUnit):
2513   """Rename the cluster.
2514
2515   """
2516   HPATH = "cluster-rename"
2517   HTYPE = constants.HTYPE_CLUSTER
2518   _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2519
2520   def BuildHooksEnv(self):
2521     """Build hooks env.
2522
2523     """
2524     env = {
2525       "OP_TARGET": self.cfg.GetClusterName(),
2526       "NEW_NAME": self.op.name,
2527       }
2528     mn = self.cfg.GetMasterNode()
2529     all_nodes = self.cfg.GetNodeList()
2530     return env, [mn], all_nodes
2531
2532   def CheckPrereq(self):
2533     """Verify that the passed name is a valid one.
2534
2535     """
2536     hostname = netutils.GetHostname(name=self.op.name,
2537                                     family=self.cfg.GetPrimaryIPFamily())
2538
2539     new_name = hostname.name
2540     self.ip = new_ip = hostname.ip
2541     old_name = self.cfg.GetClusterName()
2542     old_ip = self.cfg.GetMasterIP()
2543     if new_name == old_name and new_ip == old_ip:
2544       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2545                                  " cluster has changed",
2546                                  errors.ECODE_INVAL)
2547     if new_ip != old_ip:
2548       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2549         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2550                                    " reachable on the network" %
2551                                    new_ip, errors.ECODE_NOTUNIQUE)
2552
2553     self.op.name = new_name
2554
2555   def Exec(self, feedback_fn):
2556     """Rename the cluster.
2557
2558     """
2559     clustername = self.op.name
2560     ip = self.ip
2561
2562     # shutdown the master IP
2563     master = self.cfg.GetMasterNode()
2564     result = self.rpc.call_node_stop_master(master, False)
2565     result.Raise("Could not disable the master role")
2566
2567     try:
2568       cluster = self.cfg.GetClusterInfo()
2569       cluster.cluster_name = clustername
2570       cluster.master_ip = ip
2571       self.cfg.Update(cluster, feedback_fn)
2572
2573       # update the known hosts file
2574       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2575       node_list = self.cfg.GetNodeList()
2576       try:
2577         node_list.remove(master)
2578       except ValueError:
2579         pass
2580       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2581     finally:
2582       result = self.rpc.call_node_start_master(master, False, False)
2583       msg = result.fail_msg
2584       if msg:
2585         self.LogWarning("Could not re-enable the master role on"
2586                         " the master, please restart manually: %s", msg)
2587
2588     return clustername
2589
2590
2591 class LUSetClusterParams(LogicalUnit):
2592   """Change the parameters of the cluster.
2593
2594   """
2595   HPATH = "cluster-modify"
2596   HTYPE = constants.HTYPE_CLUSTER
2597   _OP_PARAMS = [
2598     ("vg_name", None, ht.TMaybeString),
2599     ("enabled_hypervisors", None,
2600      ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2601             ht.TNone)),
2602     ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2603                               ht.TNone)),
2604     ("beparams", None, ht.TOr(ht.TDict, ht.TNone)),
2605     ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2606                             ht.TNone)),
2607     ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2608                               ht.TNone)),
2609     ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2610     ("uid_pool", None, ht.NoType),
2611     ("add_uids", None, ht.NoType),
2612     ("remove_uids", None, ht.NoType),
2613     ("maintain_node_health", None, ht.TMaybeBool),
2614     ("prealloc_wipe_disks", None, ht.TMaybeBool),
2615     ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2616     ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2617     ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2618     ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2619     ("hidden_os", None, ht.TOr(ht.TListOf(\
2620           ht.TAnd(ht.TList,
2621                 ht.TIsLength(2),
2622                 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2623           ht.TNone)),
2624     ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2625           ht.TAnd(ht.TList,
2626                 ht.TIsLength(2),
2627                 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2628           ht.TNone)),
2629     ]
2630   REQ_BGL = False
2631
2632   def CheckArguments(self):
2633     """Check parameters
2634
2635     """
2636     if self.op.uid_pool:
2637       uidpool.CheckUidPool(self.op.uid_pool)
2638
2639     if self.op.add_uids:
2640       uidpool.CheckUidPool(self.op.add_uids)
2641
2642     if self.op.remove_uids:
2643       uidpool.CheckUidPool(self.op.remove_uids)
2644
2645   def ExpandNames(self):
2646     # FIXME: in the future maybe other cluster params won't require checking on
2647     # all nodes to be modified.
2648     self.needed_locks = {
2649       locking.LEVEL_NODE: locking.ALL_SET,
2650     }
2651     self.share_locks[locking.LEVEL_NODE] = 1
2652
2653   def BuildHooksEnv(self):
2654     """Build hooks env.
2655
2656     """
2657     env = {
2658       "OP_TARGET": self.cfg.GetClusterName(),
2659       "NEW_VG_NAME": self.op.vg_name,
2660       }
2661     mn = self.cfg.GetMasterNode()
2662     return env, [mn], [mn]
2663
2664   def CheckPrereq(self):
2665     """Check prerequisites.
2666
2667     This checks whether the given params don't conflict and
2668     if the given volume group is valid.
2669
2670     """
2671     if self.op.vg_name is not None and not self.op.vg_name:
2672       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2673         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2674                                    " instances exist", errors.ECODE_INVAL)
2675
2676     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2677       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2678         raise errors.OpPrereqError("Cannot disable drbd helper while"
2679                                    " drbd-based instances exist",
2680                                    errors.ECODE_INVAL)
2681
2682     node_list = self.acquired_locks[locking.LEVEL_NODE]
2683
2684     # if vg_name not None, checks given volume group on all nodes
2685     if self.op.vg_name:
2686       vglist = self.rpc.call_vg_list(node_list)
2687       for node in node_list:
2688         msg = vglist[node].fail_msg
2689         if msg:
2690           # ignoring down node
2691           self.LogWarning("Error while gathering data on node %s"
2692                           " (ignoring node): %s", node, msg)
2693           continue
2694         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2695                                               self.op.vg_name,
2696                                               constants.MIN_VG_SIZE)
2697         if vgstatus:
2698           raise errors.OpPrereqError("Error on node '%s': %s" %
2699                                      (node, vgstatus), errors.ECODE_ENVIRON)
2700
2701     if self.op.drbd_helper:
2702       # checks given drbd helper on all nodes
2703       helpers = self.rpc.call_drbd_helper(node_list)
2704       for node in node_list:
2705         ninfo = self.cfg.GetNodeInfo(node)
2706         if ninfo.offline:
2707           self.LogInfo("Not checking drbd helper on offline node %s", node)
2708           continue
2709         msg = helpers[node].fail_msg
2710         if msg:
2711           raise errors.OpPrereqError("Error checking drbd helper on node"
2712                                      " '%s': %s" % (node, msg),
2713                                      errors.ECODE_ENVIRON)
2714         node_helper = helpers[node].payload
2715         if node_helper != self.op.drbd_helper:
2716           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2717                                      (node, node_helper), errors.ECODE_ENVIRON)
2718
2719     self.cluster = cluster = self.cfg.GetClusterInfo()
2720     # validate params changes
2721     if self.op.beparams:
2722       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2723       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2724
2725     if self.op.nicparams:
2726       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2727       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2728       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2729       nic_errors = []
2730
2731       # check all instances for consistency
2732       for instance in self.cfg.GetAllInstancesInfo().values():
2733         for nic_idx, nic in enumerate(instance.nics):
2734           params_copy = copy.deepcopy(nic.nicparams)
2735           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2736
2737           # check parameter syntax
2738           try:
2739             objects.NIC.CheckParameterSyntax(params_filled)
2740           except errors.ConfigurationError, err:
2741             nic_errors.append("Instance %s, nic/%d: %s" %
2742                               (instance.name, nic_idx, err))
2743
2744           # if we're moving instances to routed, check that they have an ip
2745           target_mode = params_filled[constants.NIC_MODE]
2746           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2747             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2748                               (instance.name, nic_idx))
2749       if nic_errors:
2750         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2751                                    "\n".join(nic_errors))
2752
2753     # hypervisor list/parameters
2754     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2755     if self.op.hvparams:
2756       for hv_name, hv_dict in self.op.hvparams.items():
2757         if hv_name not in self.new_hvparams:
2758           self.new_hvparams[hv_name] = hv_dict
2759         else:
2760           self.new_hvparams[hv_name].update(hv_dict)
2761
2762     # os hypervisor parameters
2763     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2764     if self.op.os_hvp:
2765       for os_name, hvs in self.op.os_hvp.items():
2766         if os_name not in self.new_os_hvp:
2767           self.new_os_hvp[os_name] = hvs
2768         else:
2769           for hv_name, hv_dict in hvs.items():
2770             if hv_name not in self.new_os_hvp[os_name]:
2771               self.new_os_hvp[os_name][hv_name] = hv_dict
2772             else:
2773               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2774
2775     # os parameters
2776     self.new_osp = objects.FillDict(cluster.osparams, {})
2777     if self.op.osparams:
2778       for os_name, osp in self.op.osparams.items():
2779         if os_name not in self.new_osp:
2780           self.new_osp[os_name] = {}
2781
2782         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2783                                                   use_none=True)
2784
2785         if not self.new_osp[os_name]:
2786           # we removed all parameters
2787           del self.new_osp[os_name]
2788         else:
2789           # check the parameter validity (remote check)
2790           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2791                          os_name, self.new_osp[os_name])
2792
2793     # changes to the hypervisor list
2794     if self.op.enabled_hypervisors is not None:
2795       self.hv_list = self.op.enabled_hypervisors
2796       for hv in self.hv_list:
2797         # if the hypervisor doesn't already exist in the cluster
2798         # hvparams, we initialize it to empty, and then (in both
2799         # cases) we make sure to fill the defaults, as we might not
2800         # have a complete defaults list if the hypervisor wasn't
2801         # enabled before
2802         if hv not in new_hvp:
2803           new_hvp[hv] = {}
2804         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2805         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2806     else:
2807       self.hv_list = cluster.enabled_hypervisors
2808
2809     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2810       # either the enabled list has changed, or the parameters have, validate
2811       for hv_name, hv_params in self.new_hvparams.items():
2812         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2813             (self.op.enabled_hypervisors and
2814              hv_name in self.op.enabled_hypervisors)):
2815           # either this is a new hypervisor, or its parameters have changed
2816           hv_class = hypervisor.GetHypervisor(hv_name)
2817           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2818           hv_class.CheckParameterSyntax(hv_params)
2819           _CheckHVParams(self, node_list, hv_name, hv_params)
2820
2821     if self.op.os_hvp:
2822       # no need to check any newly-enabled hypervisors, since the
2823       # defaults have already been checked in the above code-block
2824       for os_name, os_hvp in self.new_os_hvp.items():
2825         for hv_name, hv_params in os_hvp.items():
2826           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2827           # we need to fill in the new os_hvp on top of the actual hv_p
2828           cluster_defaults = self.new_hvparams.get(hv_name, {})
2829           new_osp = objects.FillDict(cluster_defaults, hv_params)
2830           hv_class = hypervisor.GetHypervisor(hv_name)
2831           hv_class.CheckParameterSyntax(new_osp)
2832           _CheckHVParams(self, node_list, hv_name, new_osp)
2833
2834     if self.op.default_iallocator:
2835       alloc_script = utils.FindFile(self.op.default_iallocator,
2836                                     constants.IALLOCATOR_SEARCH_PATH,
2837                                     os.path.isfile)
2838       if alloc_script is None:
2839         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2840                                    " specified" % self.op.default_iallocator,
2841                                    errors.ECODE_INVAL)
2842
2843   def Exec(self, feedback_fn):
2844     """Change the parameters of the cluster.
2845
2846     """
2847     if self.op.vg_name is not None:
2848       new_volume = self.op.vg_name
2849       if not new_volume:
2850         new_volume = None
2851       if new_volume != self.cfg.GetVGName():
2852         self.cfg.SetVGName(new_volume)
2853       else:
2854         feedback_fn("Cluster LVM configuration already in desired"
2855                     " state, not changing")
2856     if self.op.drbd_helper is not None:
2857       new_helper = self.op.drbd_helper
2858       if not new_helper:
2859         new_helper = None
2860       if new_helper != self.cfg.GetDRBDHelper():
2861         self.cfg.SetDRBDHelper(new_helper)
2862       else:
2863         feedback_fn("Cluster DRBD helper already in desired state,"
2864                     " not changing")
2865     if self.op.hvparams:
2866       self.cluster.hvparams = self.new_hvparams
2867     if self.op.os_hvp:
2868       self.cluster.os_hvp = self.new_os_hvp
2869     if self.op.enabled_hypervisors is not None:
2870       self.cluster.hvparams = self.new_hvparams
2871       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2872     if self.op.beparams:
2873       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2874     if self.op.nicparams:
2875       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2876     if self.op.osparams:
2877       self.cluster.osparams = self.new_osp
2878
2879     if self.op.candidate_pool_size is not None:
2880       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2881       # we need to update the pool size here, otherwise the save will fail
2882       _AdjustCandidatePool(self, [])
2883
2884     if self.op.maintain_node_health is not None:
2885       self.cluster.maintain_node_health = self.op.maintain_node_health
2886
2887     if self.op.prealloc_wipe_disks is not None:
2888       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
2889
2890     if self.op.add_uids is not None:
2891       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2892
2893     if self.op.remove_uids is not None:
2894       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2895
2896     if self.op.uid_pool is not None:
2897       self.cluster.uid_pool = self.op.uid_pool
2898
2899     if self.op.default_iallocator is not None:
2900       self.cluster.default_iallocator = self.op.default_iallocator
2901
2902     if self.op.reserved_lvs is not None:
2903       self.cluster.reserved_lvs = self.op.reserved_lvs
2904
2905     def helper_os(aname, mods, desc):
2906       desc += " OS list"
2907       lst = getattr(self.cluster, aname)
2908       for key, val in mods:
2909         if key == constants.DDM_ADD:
2910           if val in lst:
2911             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
2912           else:
2913             lst.append(val)
2914         elif key == constants.DDM_REMOVE:
2915           if val in lst:
2916             lst.remove(val)
2917           else:
2918             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
2919         else:
2920           raise errors.ProgrammerError("Invalid modification '%s'" % key)
2921
2922     if self.op.hidden_os:
2923       helper_os("hidden_os", self.op.hidden_os, "hidden")
2924
2925     if self.op.blacklisted_os:
2926       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2927
2928     self.cfg.Update(self.cluster, feedback_fn)
2929
2930
2931 def _UploadHelper(lu, nodes, fname):
2932   """Helper for uploading a file and showing warnings.
2933
2934   """
2935   if os.path.exists(fname):
2936     result = lu.rpc.call_upload_file(nodes, fname)
2937     for to_node, to_result in result.items():
2938       msg = to_result.fail_msg
2939       if msg:
2940         msg = ("Copy of file %s to node %s failed: %s" %
2941                (fname, to_node, msg))
2942         lu.proc.LogWarning(msg)
2943
2944
2945 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
2946   """Distribute additional files which are part of the cluster configuration.
2947
2948   ConfigWriter takes care of distributing the config and ssconf files, but
2949   there are more files which should be distributed to all nodes. This function
2950   makes sure those are copied.
2951
2952   @param lu: calling logical unit
2953   @param additional_nodes: list of nodes not in the config to distribute to
2954   @type additional_vm: boolean
2955   @param additional_vm: whether the additional nodes are vm-capable or not
2956
2957   """
2958   # 1. Gather target nodes
2959   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2960   dist_nodes = lu.cfg.GetOnlineNodeList()
2961   nvm_nodes = lu.cfg.GetNonVmCapableNodeList()
2962   vm_nodes = [name for name in dist_nodes if name not in nvm_nodes]
2963   if additional_nodes is not None:
2964     dist_nodes.extend(additional_nodes)
2965     if additional_vm:
2966       vm_nodes.extend(additional_nodes)
2967   if myself.name in dist_nodes:
2968     dist_nodes.remove(myself.name)
2969   if myself.name in vm_nodes:
2970     vm_nodes.remove(myself.name)
2971
2972   # 2. Gather files to distribute
2973   dist_files = set([constants.ETC_HOSTS,
2974                     constants.SSH_KNOWN_HOSTS_FILE,
2975                     constants.RAPI_CERT_FILE,
2976                     constants.RAPI_USERS_FILE,
2977                     constants.CONFD_HMAC_KEY,
2978                     constants.CLUSTER_DOMAIN_SECRET_FILE,
2979                    ])
2980
2981   vm_files = set()
2982   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2983   for hv_name in enabled_hypervisors:
2984     hv_class = hypervisor.GetHypervisor(hv_name)
2985     vm_files.update(hv_class.GetAncillaryFiles())
2986
2987   # 3. Perform the files upload
2988   for fname in dist_files:
2989     _UploadHelper(lu, dist_nodes, fname)
2990   for fname in vm_files:
2991     _UploadHelper(lu, vm_nodes, fname)
2992
2993
2994 class LURedistributeConfig(NoHooksLU):
2995   """Force the redistribution of cluster configuration.
2996
2997   This is a very simple LU.
2998
2999   """
3000   REQ_BGL = False
3001
3002   def ExpandNames(self):
3003     self.needed_locks = {
3004       locking.LEVEL_NODE: locking.ALL_SET,
3005     }
3006     self.share_locks[locking.LEVEL_NODE] = 1
3007
3008   def Exec(self, feedback_fn):
3009     """Redistribute the configuration.
3010
3011     """
3012     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3013     _RedistributeAncillaryFiles(self)
3014
3015
3016 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3017   """Sleep and poll for an instance's disk to sync.
3018
3019   """
3020   if not instance.disks or disks is not None and not disks:
3021     return True
3022
3023   disks = _ExpandCheckDisks(instance, disks)
3024
3025   if not oneshot:
3026     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3027
3028   node = instance.primary_node
3029
3030   for dev in disks:
3031     lu.cfg.SetDiskID(dev, node)
3032
3033   # TODO: Convert to utils.Retry
3034
3035   retries = 0
3036   degr_retries = 10 # in seconds, as we sleep 1 second each time
3037   while True:
3038     max_time = 0
3039     done = True
3040     cumul_degraded = False
3041     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3042     msg = rstats.fail_msg
3043     if msg:
3044       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3045       retries += 1
3046       if retries >= 10:
3047         raise errors.RemoteError("Can't contact node %s for mirror data,"
3048                                  " aborting." % node)
3049       time.sleep(6)
3050       continue
3051     rstats = rstats.payload
3052     retries = 0
3053     for i, mstat in enumerate(rstats):
3054       if mstat is None:
3055         lu.LogWarning("Can't compute data for node %s/%s",
3056                            node, disks[i].iv_name)
3057         continue
3058
3059       cumul_degraded = (cumul_degraded or
3060                         (mstat.is_degraded and mstat.sync_percent is None))
3061       if mstat.sync_percent is not None:
3062         done = False
3063         if mstat.estimated_time is not None:
3064           rem_time = ("%s remaining (estimated)" %
3065                       utils.FormatSeconds(mstat.estimated_time))
3066           max_time = mstat.estimated_time
3067         else:
3068           rem_time = "no time estimate"
3069         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3070                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3071
3072     # if we're done but degraded, let's do a few small retries, to
3073     # make sure we see a stable and not transient situation; therefore
3074     # we force restart of the loop
3075     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3076       logging.info("Degraded disks found, %d retries left", degr_retries)
3077       degr_retries -= 1
3078       time.sleep(1)
3079       continue
3080
3081     if done or oneshot:
3082       break
3083
3084     time.sleep(min(60, max_time))
3085
3086   if done:
3087     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3088   return not cumul_degraded
3089
3090
3091 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3092   """Check that mirrors are not degraded.
3093
3094   The ldisk parameter, if True, will change the test from the
3095   is_degraded attribute (which represents overall non-ok status for
3096   the device(s)) to the ldisk (representing the local storage status).
3097
3098   """
3099   lu.cfg.SetDiskID(dev, node)
3100
3101   result = True
3102
3103   if on_primary or dev.AssembleOnSecondary():
3104     rstats = lu.rpc.call_blockdev_find(node, dev)
3105     msg = rstats.fail_msg
3106     if msg:
3107       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3108       result = False
3109     elif not rstats.payload:
3110       lu.LogWarning("Can't find disk on node %s", node)
3111       result = False
3112     else:
3113       if ldisk:
3114         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3115       else:
3116         result = result and not rstats.payload.is_degraded
3117
3118   if dev.children:
3119     for child in dev.children:
3120       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3121
3122   return result
3123
3124
3125 class LUDiagnoseOS(NoHooksLU):
3126   """Logical unit for OS diagnose/query.
3127
3128   """
3129   _OP_PARAMS = [
3130     _POutputFields,
3131     ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3132     ]
3133   REQ_BGL = False
3134   _HID = "hidden"
3135   _BLK = "blacklisted"
3136   _VLD = "valid"
3137   _FIELDS_STATIC = utils.FieldSet()
3138   _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
3139                                    "parameters", "api_versions", _HID, _BLK)
3140
3141   def CheckArguments(self):
3142     if self.op.names:
3143       raise errors.OpPrereqError("Selective OS query not supported",
3144                                  errors.ECODE_INVAL)
3145
3146     _CheckOutputFields(static=self._FIELDS_STATIC,
3147                        dynamic=self._FIELDS_DYNAMIC,
3148                        selected=self.op.output_fields)
3149
3150   def ExpandNames(self):
3151     # Lock all nodes, in shared mode
3152     # Temporary removal of locks, should be reverted later
3153     # TODO: reintroduce locks when they are lighter-weight
3154     self.needed_locks = {}
3155     #self.share_locks[locking.LEVEL_NODE] = 1
3156     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3157
3158   @staticmethod
3159   def _DiagnoseByOS(rlist):
3160     """Remaps a per-node return list into an a per-os per-node dictionary
3161
3162     @param rlist: a map with node names as keys and OS objects as values
3163
3164     @rtype: dict
3165     @return: a dictionary with osnames as keys and as value another
3166         map, with nodes as keys and tuples of (path, status, diagnose,
3167         variants, parameters, api_versions) as values, eg::
3168
3169           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3170                                      (/srv/..., False, "invalid api")],
3171                            "node2": [(/srv/..., True, "", [], [])]}
3172           }
3173
3174     """
3175     all_os = {}
3176     # we build here the list of nodes that didn't fail the RPC (at RPC
3177     # level), so that nodes with a non-responding node daemon don't
3178     # make all OSes invalid
3179     good_nodes = [node_name for node_name in rlist
3180                   if not rlist[node_name].fail_msg]
3181     for node_name, nr in rlist.items():
3182       if nr.fail_msg or not nr.payload:
3183         continue
3184       for (name, path, status, diagnose, variants,
3185            params, api_versions) in nr.payload:
3186         if name not in all_os:
3187           # build a list of nodes for this os containing empty lists
3188           # for each node in node_list
3189           all_os[name] = {}
3190           for nname in good_nodes:
3191             all_os[name][nname] = []
3192         # convert params from [name, help] to (name, help)
3193         params = [tuple(v) for v in params]
3194         all_os[name][node_name].append((path, status, diagnose,
3195                                         variants, params, api_versions))
3196     return all_os
3197
3198   def Exec(self, feedback_fn):
3199     """Compute the list of OSes.
3200
3201     """
3202     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3203     node_data = self.rpc.call_os_diagnose(valid_nodes)
3204     pol = self._DiagnoseByOS(node_data)
3205     output = []
3206     cluster = self.cfg.GetClusterInfo()
3207
3208     for os_name in utils.NiceSort(pol.keys()):
3209       os_data = pol[os_name]
3210       row = []
3211       valid = True
3212       (variants, params, api_versions) = null_state = (set(), set(), set())
3213       for idx, osl in enumerate(os_data.values()):
3214         valid = bool(valid and osl and osl[0][1])
3215         if not valid:
3216           (variants, params, api_versions) = null_state
3217           break
3218         node_variants, node_params, node_api = osl[0][3:6]
3219         if idx == 0: # first entry
3220           variants = set(node_variants)
3221           params = set(node_params)
3222           api_versions = set(node_api)
3223         else: # keep consistency
3224           variants.intersection_update(node_variants)
3225           params.intersection_update(node_params)
3226           api_versions.intersection_update(node_api)
3227
3228       is_hid = os_name in cluster.hidden_os
3229       is_blk = os_name in cluster.blacklisted_os
3230       if ((self._HID not in self.op.output_fields and is_hid) or
3231           (self._BLK not in self.op.output_fields and is_blk) or
3232           (self._VLD not in self.op.output_fields and not valid)):
3233         continue
3234
3235       for field in self.op.output_fields:
3236         if field == "name":
3237           val = os_name
3238         elif field == self._VLD:
3239           val = valid
3240         elif field == "node_status":
3241           # this is just a copy of the dict
3242           val = {}
3243           for node_name, nos_list in os_data.items():
3244             val[node_name] = nos_list
3245         elif field == "variants":
3246           val = utils.NiceSort(list(variants))
3247         elif field == "parameters":
3248           val = list(params)
3249         elif field == "api_versions":
3250           val = list(api_versions)
3251         elif field == self._HID:
3252           val = is_hid
3253         elif field == self._BLK:
3254           val = is_blk
3255         else:
3256           raise errors.ParameterError(field)
3257         row.append(val)
3258       output.append(row)
3259
3260     return output
3261
3262
3263 class LURemoveNode(LogicalUnit):
3264   """Logical unit for removing a node.
3265
3266   """
3267   HPATH = "node-remove"
3268   HTYPE = constants.HTYPE_NODE
3269   _OP_PARAMS = [
3270     _PNodeName,
3271     ]
3272
3273   def BuildHooksEnv(self):
3274     """Build hooks env.
3275
3276     This doesn't run on the target node in the pre phase as a failed
3277     node would then be impossible to remove.
3278
3279     """
3280     env = {
3281       "OP_TARGET": self.op.node_name,
3282       "NODE_NAME": self.op.node_name,
3283       }
3284     all_nodes = self.cfg.GetNodeList()
3285     try:
3286       all_nodes.remove(self.op.node_name)
3287     except ValueError:
3288       logging.warning("Node %s which is about to be removed not found"
3289                       " in the all nodes list", self.op.node_name)
3290     return env, all_nodes, all_nodes
3291
3292   def CheckPrereq(self):
3293     """Check prerequisites.
3294
3295     This checks:
3296      - the node exists in the configuration
3297      - it does not have primary or secondary instances
3298      - it's not the master
3299
3300     Any errors are signaled by raising errors.OpPrereqError.
3301
3302     """
3303     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3304     node = self.cfg.GetNodeInfo(self.op.node_name)
3305     assert node is not None
3306
3307     instance_list = self.cfg.GetInstanceList()
3308
3309     masternode = self.cfg.GetMasterNode()
3310     if node.name == masternode:
3311       raise errors.OpPrereqError("Node is the master node,"
3312                                  " you need to failover first.",
3313                                  errors.ECODE_INVAL)
3314
3315     for instance_name in instance_list:
3316       instance = self.cfg.GetInstanceInfo(instance_name)
3317       if node.name in instance.all_nodes:
3318         raise errors.OpPrereqError("Instance %s is still running on the node,"
3319                                    " please remove first." % instance_name,
3320                                    errors.ECODE_INVAL)
3321     self.op.node_name = node.name
3322     self.node = node
3323
3324   def Exec(self, feedback_fn):
3325     """Removes the node from the cluster.
3326
3327     """
3328     node = self.node
3329     logging.info("Stopping the node daemon and removing configs from node %s",
3330                  node.name)
3331
3332     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3333
3334     # Promote nodes to master candidate as needed
3335     _AdjustCandidatePool(self, exceptions=[node.name])
3336     self.context.RemoveNode(node.name)
3337
3338     # Run post hooks on the node before it's removed
3339     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3340     try:
3341       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3342     except:
3343       # pylint: disable-msg=W0702
3344       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3345
3346     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3347     msg = result.fail_msg
3348     if msg:
3349       self.LogWarning("Errors encountered on the remote node while leaving"
3350                       " the cluster: %s", msg)
3351
3352     # Remove node from our /etc/hosts
3353     if self.cfg.GetClusterInfo().modify_etc_hosts:
3354       master_node = self.cfg.GetMasterNode()
3355       result = self.rpc.call_etc_hosts_modify(master_node,
3356                                               constants.ETC_HOSTS_REMOVE,
3357                                               node.name, None)
3358       result.Raise("Can't update hosts file with new host data")
3359       _RedistributeAncillaryFiles(self)
3360
3361
3362 class LUQueryNodes(NoHooksLU):
3363   """Logical unit for querying nodes.
3364
3365   """
3366   # pylint: disable-msg=W0142
3367   _OP_PARAMS = [
3368     _POutputFields,
3369     ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3370     ("use_locking", False, ht.TBool),
3371     ]
3372   REQ_BGL = False
3373
3374   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3375                     "master_candidate", "offline", "drained",
3376                     "master_capable", "vm_capable"]
3377
3378   _FIELDS_DYNAMIC = utils.FieldSet(
3379     "dtotal", "dfree",
3380     "mtotal", "mnode", "mfree",
3381     "bootid",
3382     "ctotal", "cnodes", "csockets",
3383     )
3384
3385   _FIELDS_STATIC = utils.FieldSet(*[
3386     "pinst_cnt", "sinst_cnt",
3387     "pinst_list", "sinst_list",
3388     "pip", "sip", "tags",
3389     "master",
3390     "role"] + _SIMPLE_FIELDS
3391     )
3392
3393   def CheckArguments(self):
3394     _CheckOutputFields(static=self._FIELDS_STATIC,
3395                        dynamic=self._FIELDS_DYNAMIC,
3396                        selected=self.op.output_fields)
3397
3398   def ExpandNames(self):
3399     self.needed_locks = {}
3400     self.share_locks[locking.LEVEL_NODE] = 1
3401
3402     if self.op.names:
3403       self.wanted = _GetWantedNodes(self, self.op.names)
3404     else:
3405       self.wanted = locking.ALL_SET
3406
3407     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3408     self.do_locking = self.do_node_query and self.op.use_locking
3409     if self.do_locking:
3410       # if we don't request only static fields, we need to lock the nodes
3411       self.needed_locks[locking.LEVEL_NODE] = self.wanted
3412
3413   def Exec(self, feedback_fn):
3414     """Computes the list of nodes and their attributes.
3415
3416     """
3417     all_info = self.cfg.GetAllNodesInfo()
3418     if self.do_locking:
3419       nodenames = self.acquired_locks[locking.LEVEL_NODE]
3420     elif self.wanted != locking.ALL_SET:
3421       nodenames = self.wanted
3422       missing = set(nodenames).difference(all_info.keys())
3423       if missing:
3424         raise errors.OpExecError(
3425           "Some nodes were removed before retrieving their data: %s" % missing)
3426     else:
3427       nodenames = all_info.keys()
3428
3429     nodenames = utils.NiceSort(nodenames)
3430     nodelist = [all_info[name] for name in nodenames]
3431
3432     # begin data gathering
3433
3434     if self.do_node_query:
3435       live_data = {}
3436       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3437                                           self.cfg.GetHypervisorType())
3438       for name in nodenames:
3439         nodeinfo = node_data[name]
3440         if not nodeinfo.fail_msg and nodeinfo.payload:
3441           nodeinfo = nodeinfo.payload
3442           fn = utils.TryConvert
3443           live_data[name] = {
3444             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3445             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3446             "mfree": fn(int, nodeinfo.get('memory_free', None)),
3447             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3448             "dfree": fn(int, nodeinfo.get('vg_free', None)),
3449             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3450             "bootid": nodeinfo.get('bootid', None),
3451             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3452             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3453             }
3454         else:
3455           live_data[name] = {}
3456     else:
3457       live_data = dict.fromkeys(nodenames, {})
3458
3459     node_to_primary = dict([(name, set()) for name in nodenames])
3460     node_to_secondary = dict([(name, set()) for name in nodenames])
3461
3462     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3463                              "sinst_cnt", "sinst_list"))
3464     if inst_fields & frozenset(self.op.output_fields):
3465       inst_data = self.cfg.GetAllInstancesInfo()
3466
3467       for inst in inst_data.values():
3468         if inst.primary_node in node_to_primary:
3469           node_to_primary[inst.primary_node].add(inst.name)
3470         for secnode in inst.secondary_nodes:
3471           if secnode in node_to_secondary:
3472             node_to_secondary[secnode].add(inst.name)
3473
3474     master_node = self.cfg.GetMasterNode()
3475
3476     # end data gathering
3477
3478     output = []
3479     for node in nodelist:
3480       node_output = []
3481       for field in self.op.output_fields:
3482         if field in self._SIMPLE_FIELDS:
3483           val = getattr(node, field)
3484         elif field == "pinst_list":
3485           val = list(node_to_primary[node.name])
3486         elif field == "sinst_list":
3487           val = list(node_to_secondary[node.name])
3488         elif field == "pinst_cnt":
3489           val = len(node_to_primary[node.name])
3490         elif field == "sinst_cnt":
3491           val = len(node_to_secondary[node.name])
3492         elif field == "pip":
3493           val = node.primary_ip
3494         elif field == "sip":
3495           val = node.secondary_ip
3496         elif field == "tags":
3497           val = list(node.GetTags())
3498         elif field == "master":
3499           val = node.name == master_node
3500         elif self._FIELDS_DYNAMIC.Matches(field):
3501           val = live_data[node.name].get(field, None)
3502         elif field == "role":
3503           if node.name == master_node:
3504             val = "M"
3505           elif node.master_candidate:
3506             val = "C"
3507           elif node.drained:
3508             val = "D"
3509           elif node.offline:
3510             val = "O"
3511           else:
3512             val = "R"
3513         else:
3514           raise errors.ParameterError(field)
3515         node_output.append(val)
3516       output.append(node_output)
3517
3518     return output
3519
3520
3521 class LUQueryNodeVolumes(NoHooksLU):
3522   """Logical unit for getting volumes on node(s).
3523
3524   """
3525   _OP_PARAMS = [
3526     ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3527     ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3528     ]
3529   REQ_BGL = False
3530   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3531   _FIELDS_STATIC = utils.FieldSet("node")
3532
3533   def CheckArguments(self):
3534     _CheckOutputFields(static=self._FIELDS_STATIC,
3535                        dynamic=self._FIELDS_DYNAMIC,
3536                        selected=self.op.output_fields)
3537
3538   def ExpandNames(self):
3539     self.needed_locks = {}
3540     self.share_locks[locking.LEVEL_NODE] = 1
3541     if not self.op.nodes:
3542       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3543     else:
3544       self.needed_locks[locking.LEVEL_NODE] = \
3545         _GetWantedNodes(self, self.op.nodes)
3546
3547   def Exec(self, feedback_fn):
3548     """Computes the list of nodes and their attributes.
3549
3550     """
3551     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3552     volumes = self.rpc.call_node_volumes(nodenames)
3553
3554     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3555              in self.cfg.GetInstanceList()]
3556
3557     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3558
3559     output = []
3560     for node in nodenames:
3561       nresult = volumes[node]
3562       if nresult.offline:
3563         continue
3564       msg = nresult.fail_msg
3565       if msg:
3566         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3567         continue
3568
3569       node_vols = nresult.payload[:]
3570       node_vols.sort(key=lambda vol: vol['dev'])
3571
3572       for vol in node_vols:
3573         node_output = []
3574         for field in self.op.output_fields:
3575           if field == "node":
3576             val = node
3577           elif field == "phys":
3578             val = vol['dev']
3579           elif field == "vg":
3580             val = vol['vg']
3581           elif field == "name":
3582             val = vol['name']
3583           elif field == "size":
3584             val = int(float(vol['size']))
3585           elif field == "instance":
3586             for inst in ilist:
3587               if node not in lv_by_node[inst]:
3588                 continue
3589               if vol['name'] in lv_by_node[inst][node]:
3590                 val = inst.name
3591                 break
3592             else:
3593               val = '-'
3594           else:
3595             raise errors.ParameterError(field)
3596           node_output.append(str(val))
3597
3598         output.append(node_output)
3599
3600     return output
3601
3602
3603 class LUQueryNodeStorage(NoHooksLU):
3604   """Logical unit for getting information on storage units on node(s).
3605
3606   """
3607   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3608   _OP_PARAMS = [
3609     ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3610     ("storage_type", ht.NoDefault, _CheckStorageType),
3611     ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3612     ("name", None, ht.TMaybeString),
3613     ]
3614   REQ_BGL = False
3615
3616   def CheckArguments(self):
3617     _CheckOutputFields(static=self._FIELDS_STATIC,
3618                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3619                        selected=self.op.output_fields)
3620
3621   def ExpandNames(self):
3622     self.needed_locks = {}
3623     self.share_locks[locking.LEVEL_NODE] = 1
3624
3625     if self.op.nodes:
3626       self.needed_locks[locking.LEVEL_NODE] = \
3627         _GetWantedNodes(self, self.op.nodes)
3628     else:
3629       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3630
3631   def Exec(self, feedback_fn):
3632     """Computes the list of nodes and their attributes.
3633
3634     """
3635     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3636
3637     # Always get name to sort by
3638     if constants.SF_NAME in self.op.output_fields:
3639       fields = self.op.output_fields[:]
3640     else:
3641       fields = [constants.SF_NAME] + self.op.output_fields
3642
3643     # Never ask for node or type as it's only known to the LU
3644     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3645       while extra in fields:
3646         fields.remove(extra)
3647
3648     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3649     name_idx = field_idx[constants.SF_NAME]
3650
3651     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3652     data = self.rpc.call_storage_list(self.nodes,
3653                                       self.op.storage_type, st_args,
3654                                       self.op.name, fields)
3655
3656     result = []
3657
3658     for node in utils.NiceSort(self.nodes):
3659       nresult = data[node]
3660       if nresult.offline:
3661         continue
3662
3663       msg = nresult.fail_msg
3664       if msg:
3665         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3666         continue
3667
3668       rows = dict([(row[name_idx], row) for row in nresult.payload])
3669
3670       for name in utils.NiceSort(rows.keys()):
3671         row = rows[name]
3672
3673         out = []
3674
3675         for field in self.op.output_fields:
3676           if field == constants.SF_NODE:
3677             val = node
3678           elif field == constants.SF_TYPE:
3679             val = self.op.storage_type
3680           elif field in field_idx:
3681             val = row[field_idx[field]]
3682           else:
3683             raise errors.ParameterError(field)
3684
3685           out.append(val)
3686
3687         result.append(out)
3688
3689     return result
3690
3691
3692 class LUModifyNodeStorage(NoHooksLU):
3693   """Logical unit for modifying a storage volume on a node.
3694
3695   """
3696   _OP_PARAMS = [
3697     _PNodeName,
3698     ("storage_type", ht.NoDefault, _CheckStorageType),
3699     ("name", ht.NoDefault, ht.TNonEmptyString),
3700     ("changes", ht.NoDefault, ht.TDict),
3701     ]
3702   REQ_BGL = False
3703
3704   def CheckArguments(self):
3705     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3706
3707     storage_type = self.op.storage_type
3708
3709     try:
3710       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3711     except KeyError:
3712       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3713                                  " modified" % storage_type,
3714                                  errors.ECODE_INVAL)
3715
3716     diff = set(self.op.changes.keys()) - modifiable
3717     if diff:
3718       raise errors.OpPrereqError("The following fields can not be modified for"
3719                                  " storage units of type '%s': %r" %
3720                                  (storage_type, list(diff)),
3721                                  errors.ECODE_INVAL)
3722
3723   def ExpandNames(self):
3724     self.needed_locks = {
3725       locking.LEVEL_NODE: self.op.node_name,
3726       }
3727
3728   def Exec(self, feedback_fn):
3729     """Computes the list of nodes and their attributes.
3730
3731     """
3732     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3733     result = self.rpc.call_storage_modify(self.op.node_name,
3734                                           self.op.storage_type, st_args,
3735                                           self.op.name, self.op.changes)
3736     result.Raise("Failed to modify storage unit '%s' on %s" %
3737                  (self.op.name, self.op.node_name))
3738
3739
3740 class LUAddNode(LogicalUnit):
3741   """Logical unit for adding node to the cluster.
3742
3743   """
3744   HPATH = "node-add"
3745   HTYPE = constants.HTYPE_NODE
3746   _OP_PARAMS = [
3747     _PNodeName,
3748     ("primary_ip", None, ht.NoType),
3749     ("secondary_ip", None, ht.TMaybeString),
3750     ("readd", False, ht.TBool),
3751     ("group", None, ht.TMaybeString),
3752     ("master_capable", None, ht.TMaybeBool),
3753     ("vm_capable", None, ht.TMaybeBool),
3754     ]
3755   _NFLAGS = ["master_capable", "vm_capable"]
3756
3757   def CheckArguments(self):
3758     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3759     # validate/normalize the node name
3760     self.hostname = netutils.GetHostname(name=self.op.node_name,
3761                                          family=self.primary_ip_family)
3762     self.op.node_name = self.hostname.name
3763     if self.op.readd and self.op.group:
3764       raise errors.OpPrereqError("Cannot pass a node group when a node is"
3765                                  " being readded", errors.ECODE_INVAL)
3766
3767   def BuildHooksEnv(self):
3768     """Build hooks env.
3769
3770     This will run on all nodes before, and on all nodes + the new node after.
3771
3772     """
3773     env = {
3774       "OP_TARGET": self.op.node_name,
3775       "NODE_NAME": self.op.node_name,
3776       "NODE_PIP": self.op.primary_ip,
3777       "NODE_SIP": self.op.secondary_ip,
3778       "MASTER_CAPABLE": str(self.op.master_capable),
3779       "VM_CAPABLE": str(self.op.vm_capable),
3780       }
3781     nodes_0 = self.cfg.GetNodeList()
3782     nodes_1 = nodes_0 + [self.op.node_name, ]
3783     return env, nodes_0, nodes_1
3784
3785   def CheckPrereq(self):
3786     """Check prerequisites.
3787
3788     This checks:
3789      - the new node is not already in the config
3790      - it is resolvable
3791      - its parameters (single/dual homed) matches the cluster
3792
3793     Any errors are signaled by raising errors.OpPrereqError.
3794
3795     """
3796     cfg = self.cfg
3797     hostname = self.hostname
3798     node = hostname.name
3799     primary_ip = self.op.primary_ip = hostname.ip
3800     if self.op.secondary_ip is None:
3801       if self.primary_ip_family == netutils.IP6Address.family:
3802         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3803                                    " IPv4 address must be given as secondary",
3804                                    errors.ECODE_INVAL)
3805       self.op.secondary_ip = primary_ip
3806
3807     secondary_ip = self.op.secondary_ip
3808     if not netutils.IP4Address.IsValid(secondary_ip):
3809       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3810                                  " address" % secondary_ip, errors.ECODE_INVAL)
3811
3812     node_list = cfg.GetNodeList()
3813     if not self.op.readd and node in node_list:
3814       raise errors.OpPrereqError("Node %s is already in the configuration" %
3815                                  node, errors.ECODE_EXISTS)
3816     elif self.op.readd and node not in node_list:
3817       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3818                                  errors.ECODE_NOENT)
3819
3820     self.changed_primary_ip = False
3821
3822     for existing_node_name in node_list:
3823       existing_node = cfg.GetNodeInfo(existing_node_name)
3824
3825       if self.op.readd and node == existing_node_name:
3826         if existing_node.secondary_ip != secondary_ip:
3827           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3828                                      " address configuration as before",
3829                                      errors.ECODE_INVAL)
3830         if existing_node.primary_ip != primary_ip:
3831           self.changed_primary_ip = True
3832
3833         continue
3834
3835       if (existing_node.primary_ip == primary_ip or
3836           existing_node.secondary_ip == primary_ip or
3837           existing_node.primary_ip == secondary_ip or
3838           existing_node.secondary_ip == secondary_ip):
3839         raise errors.OpPrereqError("New node ip address(es) conflict with"
3840                                    " existing node %s" % existing_node.name,
3841                                    errors.ECODE_NOTUNIQUE)
3842
3843     # After this 'if' block, None is no longer a valid value for the
3844     # _capable op attributes
3845     if self.op.readd:
3846       old_node = self.cfg.GetNodeInfo(node)
3847       assert old_node is not None, "Can't retrieve locked node %s" % node
3848       for attr in self._NFLAGS:
3849         if getattr(self.op, attr) is None:
3850           setattr(self.op, attr, getattr(old_node, attr))
3851     else:
3852       for attr in self._NFLAGS:
3853         if getattr(self.op, attr) is None:
3854           setattr(self.op, attr, True)
3855
3856     if self.op.readd and not self.op.vm_capable:
3857       pri, sec = cfg.GetNodeInstances(node)
3858       if pri or sec:
3859         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
3860                                    " flag set to false, but it already holds"
3861                                    " instances" % node,
3862                                    errors.ECODE_STATE)
3863
3864     # check that the type of the node (single versus dual homed) is the
3865     # same as for the master
3866     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3867     master_singlehomed = myself.secondary_ip == myself.primary_ip
3868     newbie_singlehomed = secondary_ip == primary_ip
3869     if master_singlehomed != newbie_singlehomed:
3870       if master_singlehomed:
3871         raise errors.OpPrereqError("The master has no secondary ip but the"
3872                                    " new node has one",
3873                                    errors.ECODE_INVAL)
3874       else:
3875         raise errors.OpPrereqError("The master has a secondary ip but the"
3876                                    " new node doesn't have one",
3877                                    errors.ECODE_INVAL)
3878
3879     # checks reachability
3880     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3881       raise errors.OpPrereqError("Node not reachable by ping",
3882                                  errors.ECODE_ENVIRON)
3883
3884     if not newbie_singlehomed:
3885       # check reachability from my secondary ip to newbie's secondary ip
3886       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3887                            source=myself.secondary_ip):
3888         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3889                                    " based ping to node daemon port",
3890                                    errors.ECODE_ENVIRON)
3891
3892     if self.op.readd:
3893       exceptions = [node]
3894     else:
3895       exceptions = []
3896
3897     if self.op.master_capable:
3898       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3899     else:
3900       self.master_candidate = False
3901
3902     if self.op.readd:
3903       self.new_node = old_node
3904     else:
3905       node_group = cfg.LookupNodeGroup(self.op.group)
3906       self.new_node = objects.Node(name=node,
3907                                    primary_ip=primary_ip,
3908                                    secondary_ip=secondary_ip,
3909                                    master_candidate=self.master_candidate,
3910                                    offline=False, drained=False,
3911                                    group=node_group)
3912
3913   def Exec(self, feedback_fn):
3914     """Adds the new node to the cluster.
3915
3916     """
3917     new_node = self.new_node
3918     node = new_node.name
3919
3920     # for re-adds, reset the offline/drained/master-candidate flags;
3921     # we need to reset here, otherwise offline would prevent RPC calls
3922     # later in the procedure; this also means that if the re-add
3923     # fails, we are left with a non-offlined, broken node
3924     if self.op.readd:
3925       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3926       self.LogInfo("Readding a node, the offline/drained flags were reset")
3927       # if we demote the node, we do cleanup later in the procedure
3928       new_node.master_candidate = self.master_candidate
3929       if self.changed_primary_ip:
3930         new_node.primary_ip = self.op.primary_ip
3931
3932     # copy the master/vm_capable flags
3933     for attr in self._NFLAGS:
3934       setattr(new_node, attr, getattr(self.op, attr))
3935
3936     # notify the user about any possible mc promotion
3937     if new_node.master_candidate:
3938       self.LogInfo("Node will be a master candidate")
3939
3940     # check connectivity
3941     result = self.rpc.call_version([node])[node]
3942     result.Raise("Can't get version information from node %s" % node)
3943     if constants.PROTOCOL_VERSION == result.payload:
3944       logging.info("Communication to node %s fine, sw version %s match",
3945                    node, result.payload)
3946     else:
3947       raise errors.OpExecError("Version mismatch master version %s,"
3948                                " node version %s" %
3949                                (constants.PROTOCOL_VERSION, result.payload))
3950
3951     # Add node to our /etc/hosts, and add key to known_hosts
3952     if self.cfg.GetClusterInfo().modify_etc_hosts:
3953       master_node = self.cfg.GetMasterNode()
3954       result = self.rpc.call_etc_hosts_modify(master_node,
3955                                               constants.ETC_HOSTS_ADD,
3956                                               self.hostname.name,
3957                                               self.hostname.ip)
3958       result.Raise("Can't update hosts file with new host data")
3959
3960     if new_node.secondary_ip != new_node.primary_ip:
3961       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
3962                                False)
3963
3964     node_verify_list = [self.cfg.GetMasterNode()]
3965     node_verify_param = {
3966       constants.NV_NODELIST: [node],
3967       # TODO: do a node-net-test as well?
3968     }
3969
3970     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3971                                        self.cfg.GetClusterName())
3972     for verifier in node_verify_list:
3973       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3974       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3975       if nl_payload:
3976         for failed in nl_payload:
3977           feedback_fn("ssh/hostname verification failed"
3978                       " (checking from %s): %s" %
3979                       (verifier, nl_payload[failed]))
3980         raise errors.OpExecError("ssh/hostname verification failed.")
3981
3982     if self.op.readd:
3983       _RedistributeAncillaryFiles(self)
3984       self.context.ReaddNode(new_node)
3985       # make sure we redistribute the config
3986       self.cfg.Update(new_node, feedback_fn)
3987       # and make sure the new node will not have old files around
3988       if not new_node.master_candidate:
3989         result = self.rpc.call_node_demote_from_mc(new_node.name)
3990         msg = result.fail_msg
3991         if msg:
3992           self.LogWarning("Node failed to demote itself from master"
3993                           " candidate status: %s" % msg)
3994     else:
3995       _RedistributeAncillaryFiles(self, additional_nodes=[node],
3996                                   additional_vm=self.op.vm_capable)
3997       self.context.AddNode(new_node, self.proc.GetECId())
3998
3999
4000 class LUSetNodeParams(LogicalUnit):
4001   """Modifies the parameters of a node.
4002
4003   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4004       to the node role (as _ROLE_*)
4005   @cvar _R2F: a dictionary from node role to tuples of flags
4006   @cvar _FLAGS: a list of attribute names corresponding to the flags
4007
4008   """
4009   HPATH = "node-modify"
4010   HTYPE = constants.HTYPE_NODE
4011   _OP_PARAMS = [
4012     _PNodeName,
4013     ("master_candidate", None, ht.TMaybeBool),
4014     ("offline", None, ht.TMaybeBool),
4015     ("drained", None, ht.TMaybeBool),
4016     ("auto_promote", False, ht.TBool),
4017     ("master_capable", None, ht.TMaybeBool),
4018     ("vm_capable", None, ht.TMaybeBool),
4019     ("secondary_ip", None, ht.TMaybeString),
4020     _PForce,
4021     ]
4022   REQ_BGL = False
4023   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4024   _F2R = {
4025     (True, False, False): _ROLE_CANDIDATE,
4026     (False, True, False): _ROLE_DRAINED,
4027     (False, False, True): _ROLE_OFFLINE,
4028     (False, False, False): _ROLE_REGULAR,
4029     }
4030   _R2F = dict((v, k) for k, v in _F2R.items())
4031   _FLAGS = ["master_candidate", "drained", "offline"]
4032
4033   def CheckArguments(self):
4034     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4035     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4036                 self.op.master_capable, self.op.vm_capable,
4037                 self.op.secondary_ip]
4038     if all_mods.count(None) == len(all_mods):
4039       raise errors.OpPrereqError("Please pass at least one modification",
4040                                  errors.ECODE_INVAL)
4041     if all_mods.count(True) > 1:
4042       raise errors.OpPrereqError("Can't set the node into more than one"
4043                                  " state at the same time",
4044                                  errors.ECODE_INVAL)
4045
4046     # Boolean value that tells us whether we might be demoting from MC
4047     self.might_demote = (self.op.master_candidate == False or
4048                          self.op.offline == True or
4049                          self.op.drained == True or
4050                          self.op.master_capable == False)
4051
4052     if self.op.secondary_ip:
4053       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4054         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4055                                    " address" % self.op.secondary_ip,
4056                                    errors.ECODE_INVAL)
4057
4058     self.lock_all = self.op.auto_promote and self.might_demote
4059     self.lock_instances = self.op.secondary_ip is not None
4060
4061   def ExpandNames(self):
4062     if self.lock_all:
4063       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4064     else:
4065       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4066
4067     if self.lock_instances:
4068       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4069
4070   def DeclareLocks(self, level):
4071     # If we have locked all instances, before waiting to lock nodes, release
4072     # all the ones living on nodes unrelated to the current operation.
4073     if level == locking.LEVEL_NODE and self.lock_instances:
4074       instances_release = []
4075       instances_keep = []
4076       self.affected_instances = []
4077       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4078         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4079           instance = self.context.cfg.GetInstanceInfo(instance_name)
4080           i_mirrored = instance.disk_template in constants.DTS_NET_MIRROR
4081           if i_mirrored and self.op.node_name in instance.all_nodes:
4082             instances_keep.append(instance_name)
4083             self.affected_instances.append(instance)
4084           else:
4085             instances_release.append(instance_name)
4086         if instances_release:
4087           self.context.glm.release(locking.LEVEL_INSTANCE, instances_release)
4088           self.acquired_locks[locking.LEVEL_INSTANCE] = instances_keep
4089
4090   def BuildHooksEnv(self):
4091     """Build hooks env.
4092
4093     This runs on the master node.
4094
4095     """
4096     env = {
4097       "OP_TARGET": self.op.node_name,
4098       "MASTER_CANDIDATE": str(self.op.master_candidate),
4099       "OFFLINE": str(self.op.offline),
4100       "DRAINED": str(self.op.drained),
4101       "MASTER_CAPABLE": str(self.op.master_capable),
4102       "VM_CAPABLE": str(self.op.vm_capable),
4103       }
4104     nl = [self.cfg.GetMasterNode(),
4105           self.op.node_name]
4106     return env, nl, nl
4107
4108   def CheckPrereq(self):
4109     """Check prerequisites.
4110
4111     This only checks the instance list against the existing names.
4112
4113     """
4114     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4115
4116     if (self.op.master_candidate is not None or
4117         self.op.drained is not None or
4118         self.op.offline is not None):
4119       # we can't change the master's node flags
4120       if self.op.node_name == self.cfg.GetMasterNode():
4121         raise errors.OpPrereqError("The master role can be changed"
4122                                    " only via master-failover",
4123                                    errors.ECODE_INVAL)
4124
4125     if self.op.master_candidate and not node.master_capable:
4126       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4127                                  " it a master candidate" % node.name,
4128                                  errors.ECODE_STATE)
4129
4130     if self.op.vm_capable == False:
4131       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4132       if ipri or isec:
4133         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4134                                    " the vm_capable flag" % node.name,
4135                                    errors.ECODE_STATE)
4136
4137     if node.master_candidate and self.might_demote and not self.lock_all:
4138       assert not self.op.auto_promote, "auto-promote set but lock_all not"
4139       # check if after removing the current node, we're missing master
4140       # candidates
4141       (mc_remaining, mc_should, _) = \
4142           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4143       if mc_remaining < mc_should:
4144         raise errors.OpPrereqError("Not enough master candidates, please"
4145                                    " pass auto_promote to allow promotion",
4146                                    errors.ECODE_STATE)
4147
4148     self.old_flags = old_flags = (node.master_candidate,
4149                                   node.drained, node.offline)
4150     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4151     self.old_role = old_role = self._F2R[old_flags]
4152
4153     # Check for ineffective changes
4154     for attr in self._FLAGS:
4155       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4156         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4157         setattr(self.op, attr, None)
4158
4159     # Past this point, any flag change to False means a transition
4160     # away from the respective state, as only real changes are kept
4161
4162     # If we're being deofflined/drained, we'll MC ourself if needed
4163     if (self.op.drained == False or self.op.offline == False or
4164         (self.op.master_capable and not node.master_capable)):
4165       if _DecideSelfPromotion(self):
4166         self.op.master_candidate = True
4167         self.LogInfo("Auto-promoting node to master candidate")
4168
4169     # If we're no longer master capable, we'll demote ourselves from MC
4170     if self.op.master_capable == False and node.master_candidate:
4171       self.LogInfo("Demoting from master candidate")
4172       self.op.master_candidate = False
4173
4174     # Compute new role
4175     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4176     if self.op.master_candidate:
4177       new_role = self._ROLE_CANDIDATE
4178     elif self.op.drained:
4179       new_role = self._ROLE_DRAINED
4180     elif self.op.offline:
4181       new_role = self._ROLE_OFFLINE
4182     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4183       # False is still in new flags, which means we're un-setting (the
4184       # only) True flag
4185       new_role = self._ROLE_REGULAR
4186     else: # no new flags, nothing, keep old role
4187       new_role = old_role
4188
4189     self.new_role = new_role
4190
4191     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4192       # Trying to transition out of offline status
4193       result = self.rpc.call_version([node.name])[node.name]
4194       if result.fail_msg:
4195         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4196                                    " to report its version: %s" %
4197                                    (node.name, result.fail_msg),
4198                                    errors.ECODE_STATE)
4199       else:
4200         self.LogWarning("Transitioning node from offline to online state"
4201                         " without using re-add. Please make sure the node"
4202                         " is healthy!")
4203
4204     if self.op.secondary_ip:
4205       # Ok even without locking, because this can't be changed by any LU
4206       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4207       master_singlehomed = master.secondary_ip == master.primary_ip
4208       if master_singlehomed and self.op.secondary_ip:
4209         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4210                                    " homed cluster", errors.ECODE_INVAL)
4211
4212       if node.offline:
4213         if self.affected_instances:
4214           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4215                                      " node has instances (%s) configured"
4216                                      " to use it" % self.affected_instances)
4217       else:
4218         # On online nodes, check that no instances are running, and that
4219         # the node has the new ip and we can reach it.
4220         for instance in self.affected_instances:
4221           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4222
4223         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4224         if master.name != node.name:
4225           # check reachability from master secondary ip to new secondary ip
4226           if not netutils.TcpPing(self.op.secondary_ip,
4227                                   constants.DEFAULT_NODED_PORT,
4228                                   source=master.secondary_ip):
4229             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4230                                        " based ping to node daemon port",
4231                                        errors.ECODE_ENVIRON)
4232
4233   def Exec(self, feedback_fn):
4234     """Modifies a node.
4235
4236     """
4237     node = self.node
4238     old_role = self.old_role
4239     new_role = self.new_role
4240
4241     result = []
4242
4243     for attr in ["master_capable", "vm_capable"]:
4244       val = getattr(self.op, attr)
4245       if val is not None:
4246         setattr(node, attr, val)
4247         result.append((attr, str(val)))
4248
4249     if new_role != old_role:
4250       # Tell the node to demote itself, if no longer MC and not offline
4251       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4252         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4253         if msg:
4254           self.LogWarning("Node failed to demote itself: %s", msg)
4255
4256       new_flags = self._R2F[new_role]
4257       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4258         if of != nf:
4259           result.append((desc, str(nf)))
4260       (node.master_candidate, node.drained, node.offline) = new_flags
4261
4262       # we locked all nodes, we adjust the CP before updating this node
4263       if self.lock_all:
4264         _AdjustCandidatePool(self, [node.name])
4265
4266     if self.op.secondary_ip:
4267       node.secondary_ip = self.op.secondary_ip
4268       result.append(("secondary_ip", self.op.secondary_ip))
4269
4270     # this will trigger configuration file update, if needed
4271     self.cfg.Update(node, feedback_fn)
4272
4273     # this will trigger job queue propagation or cleanup if the mc
4274     # flag changed
4275     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4276       self.context.ReaddNode(node)
4277
4278     return result
4279
4280
4281 class LUPowercycleNode(NoHooksLU):
4282   """Powercycles a node.
4283
4284   """
4285   _OP_PARAMS = [
4286     _PNodeName,
4287     _PForce,
4288     ]
4289   REQ_BGL = False
4290
4291   def CheckArguments(self):
4292     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4293     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4294       raise errors.OpPrereqError("The node is the master and the force"
4295                                  " parameter was not set",
4296                                  errors.ECODE_INVAL)
4297
4298   def ExpandNames(self):
4299     """Locking for PowercycleNode.
4300
4301     This is a last-resort option and shouldn't block on other
4302     jobs. Therefore, we grab no locks.
4303
4304     """
4305     self.needed_locks = {}
4306
4307   def Exec(self, feedback_fn):
4308     """Reboots a node.
4309
4310     """
4311     result = self.rpc.call_node_powercycle(self.op.node_name,
4312                                            self.cfg.GetHypervisorType())
4313     result.Raise("Failed to schedule the reboot")
4314     return result.payload
4315
4316
4317 class LUQueryClusterInfo(NoHooksLU):
4318   """Query cluster configuration.
4319
4320   """
4321   REQ_BGL = False
4322
4323   def ExpandNames(self):
4324     self.needed_locks = {}
4325
4326   def Exec(self, feedback_fn):
4327     """Return cluster config.
4328
4329     """
4330     cluster = self.cfg.GetClusterInfo()
4331     os_hvp = {}
4332
4333     # Filter just for enabled hypervisors
4334     for os_name, hv_dict in cluster.os_hvp.items():
4335       os_hvp[os_name] = {}
4336       for hv_name, hv_params in hv_dict.items():
4337         if hv_name in cluster.enabled_hypervisors:
4338           os_hvp[os_name][hv_name] = hv_params
4339
4340     # Convert ip_family to ip_version
4341     primary_ip_version = constants.IP4_VERSION
4342     if cluster.primary_ip_family == netutils.IP6Address.family:
4343       primary_ip_version = constants.IP6_VERSION
4344
4345     result = {
4346       "software_version": constants.RELEASE_VERSION,
4347       "protocol_version": constants.PROTOCOL_VERSION,
4348       "config_version": constants.CONFIG_VERSION,
4349       "os_api_version": max(constants.OS_API_VERSIONS),
4350       "export_version": constants.EXPORT_VERSION,
4351       "architecture": (platform.architecture()[0], platform.machine()),
4352       "name": cluster.cluster_name,
4353       "master": cluster.master_node,
4354       "default_hypervisor": cluster.enabled_hypervisors[0],
4355       "enabled_hypervisors": cluster.enabled_hypervisors,
4356       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4357                         for hypervisor_name in cluster.enabled_hypervisors]),
4358       "os_hvp": os_hvp,
4359       "beparams": cluster.beparams,
4360       "osparams": cluster.osparams,
4361       "nicparams": cluster.nicparams,
4362       "candidate_pool_size": cluster.candidate_pool_size,
4363       "master_netdev": cluster.master_netdev,
4364       "volume_group_name": cluster.volume_group_name,
4365       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4366       "file_storage_dir": cluster.file_storage_dir,
4367       "maintain_node_health": cluster.maintain_node_health,
4368       "ctime": cluster.ctime,
4369       "mtime": cluster.mtime,
4370       "uuid": cluster.uuid,
4371       "tags": list(cluster.GetTags()),
4372       "uid_pool": cluster.uid_pool,
4373       "default_iallocator": cluster.default_iallocator,
4374       "reserved_lvs": cluster.reserved_lvs,
4375       "primary_ip_version": primary_ip_version,
4376       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4377       }
4378
4379     return result
4380
4381
4382 class LUQueryConfigValues(NoHooksLU):
4383   """Return configuration values.
4384
4385   """
4386   _OP_PARAMS = [_POutputFields]
4387   REQ_BGL = False
4388   _FIELDS_DYNAMIC = utils.FieldSet()
4389   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4390                                   "watcher_pause", "volume_group_name")
4391
4392   def CheckArguments(self):
4393     _CheckOutputFields(static=self._FIELDS_STATIC,
4394                        dynamic=self._FIELDS_DYNAMIC,
4395                        selected=self.op.output_fields)
4396
4397   def ExpandNames(self):
4398     self.needed_locks = {}
4399
4400   def Exec(self, feedback_fn):
4401     """Dump a representation of the cluster config to the standard output.
4402
4403     """
4404     values = []
4405     for field in self.op.output_fields:
4406       if field == "cluster_name":
4407         entry = self.cfg.GetClusterName()
4408       elif field == "master_node":
4409         entry = self.cfg.GetMasterNode()
4410       elif field == "drain_flag":
4411         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4412       elif field == "watcher_pause":
4413         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4414       elif field == "volume_group_name":
4415         entry = self.cfg.GetVGName()
4416       else:
4417         raise errors.ParameterError(field)
4418       values.append(entry)
4419     return values
4420
4421
4422 class LUActivateInstanceDisks(NoHooksLU):
4423   """Bring up an instance's disks.
4424
4425   """
4426   _OP_PARAMS = [
4427     _PInstanceName,
4428     ("ignore_size", False, ht.TBool),
4429     ]
4430   REQ_BGL = False
4431
4432   def ExpandNames(self):
4433     self._ExpandAndLockInstance()
4434     self.needed_locks[locking.LEVEL_NODE] = []
4435     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4436
4437   def DeclareLocks(self, level):
4438     if level == locking.LEVEL_NODE:
4439       self._LockInstancesNodes()
4440
4441   def CheckPrereq(self):
4442     """Check prerequisites.
4443
4444     This checks that the instance is in the cluster.
4445
4446     """
4447     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4448     assert self.instance is not None, \
4449       "Cannot retrieve locked instance %s" % self.op.instance_name
4450     _CheckNodeOnline(self, self.instance.primary_node)
4451
4452   def Exec(self, feedback_fn):
4453     """Activate the disks.
4454
4455     """
4456     disks_ok, disks_info = \
4457               _AssembleInstanceDisks(self, self.instance,
4458                                      ignore_size=self.op.ignore_size)
4459     if not disks_ok:
4460       raise errors.OpExecError("Cannot activate block devices")
4461
4462     return disks_info
4463
4464
4465 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4466                            ignore_size=False):
4467   """Prepare the block devices for an instance.
4468
4469   This sets up the block devices on all nodes.
4470
4471   @type lu: L{LogicalUnit}
4472   @param lu: the logical unit on whose behalf we execute
4473   @type instance: L{objects.Instance}
4474   @param instance: the instance for whose disks we assemble
4475   @type disks: list of L{objects.Disk} or None
4476   @param disks: which disks to assemble (or all, if None)
4477   @type ignore_secondaries: boolean
4478   @param ignore_secondaries: if true, errors on secondary nodes
4479       won't result in an error return from the function
4480   @type ignore_size: boolean
4481   @param ignore_size: if true, the current known size of the disk
4482       will not be used during the disk activation, useful for cases
4483       when the size is wrong
4484   @return: False if the operation failed, otherwise a list of
4485       (host, instance_visible_name, node_visible_name)
4486       with the mapping from node devices to instance devices
4487
4488   """
4489   device_info = []
4490   disks_ok = True
4491   iname = instance.name
4492   disks = _ExpandCheckDisks(instance, disks)
4493
4494   # With the two passes mechanism we try to reduce the window of
4495   # opportunity for the race condition of switching DRBD to primary
4496   # before handshaking occured, but we do not eliminate it
4497
4498   # The proper fix would be to wait (with some limits) until the
4499   # connection has been made and drbd transitions from WFConnection
4500   # into any other network-connected state (Connected, SyncTarget,
4501   # SyncSource, etc.)
4502
4503   # 1st pass, assemble on all nodes in secondary mode
4504   for inst_disk in disks:
4505     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4506       if ignore_size:
4507         node_disk = node_disk.Copy()
4508         node_disk.UnsetSize()
4509       lu.cfg.SetDiskID(node_disk, node)
4510       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4511       msg = result.fail_msg
4512       if msg:
4513         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4514                            " (is_primary=False, pass=1): %s",
4515                            inst_disk.iv_name, node, msg)
4516         if not ignore_secondaries:
4517           disks_ok = False
4518
4519   # FIXME: race condition on drbd migration to primary
4520
4521   # 2nd pass, do only the primary node
4522   for inst_disk in disks:
4523     dev_path = None
4524
4525     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4526       if node != instance.primary_node:
4527         continue
4528       if ignore_size:
4529         node_disk = node_disk.Copy()
4530         node_disk.UnsetSize()
4531       lu.cfg.SetDiskID(node_disk, node)
4532       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4533       msg = result.fail_msg
4534       if msg:
4535         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4536                            " (is_primary=True, pass=2): %s",
4537                            inst_disk.iv_name, node, msg)
4538         disks_ok = False
4539       else:
4540         dev_path = result.payload
4541
4542     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4543
4544   # leave the disks configured for the primary node
4545   # this is a workaround that would be fixed better by
4546   # improving the logical/physical id handling
4547   for disk in disks:
4548     lu.cfg.SetDiskID(disk, instance.primary_node)
4549
4550   return disks_ok, device_info
4551
4552
4553 def _StartInstanceDisks(lu, instance, force):
4554   """Start the disks of an instance.
4555
4556   """
4557   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4558                                            ignore_secondaries=force)
4559   if not disks_ok:
4560     _ShutdownInstanceDisks(lu, instance)
4561     if force is not None and not force:
4562       lu.proc.LogWarning("", hint="If the message above refers to a"
4563                          " secondary node,"
4564                          " you can retry the operation using '--force'.")
4565     raise errors.OpExecError("Disk consistency error")
4566
4567
4568 class LUDeactivateInstanceDisks(NoHooksLU):
4569   """Shutdown an instance's disks.
4570
4571   """
4572   _OP_PARAMS = [
4573     _PInstanceName,
4574     ]
4575   REQ_BGL = False
4576
4577   def ExpandNames(self):
4578     self._ExpandAndLockInstance()
4579     self.needed_locks[locking.LEVEL_NODE] = []
4580     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4581
4582   def DeclareLocks(self, level):
4583     if level == locking.LEVEL_NODE:
4584       self._LockInstancesNodes()
4585
4586   def CheckPrereq(self):
4587     """Check prerequisites.
4588
4589     This checks that the instance is in the cluster.
4590
4591     """
4592     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4593     assert self.instance is not None, \
4594       "Cannot retrieve locked instance %s" % self.op.instance_name
4595
4596   def Exec(self, feedback_fn):
4597     """Deactivate the disks
4598
4599     """
4600     instance = self.instance
4601     _SafeShutdownInstanceDisks(self, instance)
4602
4603
4604 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4605   """Shutdown block devices of an instance.
4606
4607   This function checks if an instance is running, before calling
4608   _ShutdownInstanceDisks.
4609
4610   """
4611   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4612   _ShutdownInstanceDisks(lu, instance, disks=disks)
4613
4614
4615 def _ExpandCheckDisks(instance, disks):
4616   """Return the instance disks selected by the disks list
4617
4618   @type disks: list of L{objects.Disk} or None
4619   @param disks: selected disks
4620   @rtype: list of L{objects.Disk}
4621   @return: selected instance disks to act on
4622
4623   """
4624   if disks is None:
4625     return instance.disks
4626   else:
4627     if not set(disks).issubset(instance.disks):
4628       raise errors.ProgrammerError("Can only act on disks belonging to the"
4629                                    " target instance")
4630     return disks
4631
4632
4633 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4634   """Shutdown block devices of an instance.
4635
4636   This does the shutdown on all nodes of the instance.
4637
4638   If the ignore_primary is false, errors on the primary node are
4639   ignored.
4640
4641   """
4642   all_result = True
4643   disks = _ExpandCheckDisks(instance, disks)
4644
4645   for disk in disks:
4646     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4647       lu.cfg.SetDiskID(top_disk, node)
4648       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4649       msg = result.fail_msg
4650       if msg:
4651         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4652                       disk.iv_name, node, msg)
4653         if not ignore_primary or node != instance.primary_node:
4654           all_result = False
4655   return all_result
4656
4657
4658 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4659   """Checks if a node has enough free memory.
4660
4661   This function check if a given node has the needed amount of free
4662   memory. In case the node has less memory or we cannot get the
4663   information from the node, this function raise an OpPrereqError
4664   exception.
4665
4666   @type lu: C{LogicalUnit}
4667   @param lu: a logical unit from which we get configuration data
4668   @type node: C{str}
4669   @param node: the node to check
4670   @type reason: C{str}
4671   @param reason: string to use in the error message
4672   @type requested: C{int}
4673   @param requested: the amount of memory in MiB to check for
4674   @type hypervisor_name: C{str}
4675   @param hypervisor_name: the hypervisor to ask for memory stats
4676   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4677       we cannot check the node
4678
4679   """
4680   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4681   nodeinfo[node].Raise("Can't get data from node %s" % node,
4682                        prereq=True, ecode=errors.ECODE_ENVIRON)
4683   free_mem = nodeinfo[node].payload.get('memory_free', None)
4684   if not isinstance(free_mem, int):
4685     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4686                                " was '%s'" % (node, free_mem),
4687                                errors.ECODE_ENVIRON)
4688   if requested > free_mem:
4689     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4690                                " needed %s MiB, available %s MiB" %
4691                                (node, reason, requested, free_mem),
4692                                errors.ECODE_NORES)
4693
4694
4695 def _CheckNodesFreeDisk(lu, nodenames, requested):
4696   """Checks if nodes have enough free disk space in the default VG.
4697
4698   This function check if all given nodes have the needed amount of
4699   free disk. In case any node has less disk or we cannot get the
4700   information from the node, this function raise an OpPrereqError
4701   exception.
4702
4703   @type lu: C{LogicalUnit}
4704   @param lu: a logical unit from which we get configuration data
4705   @type nodenames: C{list}
4706   @param nodenames: the list of node names to check
4707   @type requested: C{int}
4708   @param requested: the amount of disk in MiB to check for
4709   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4710       we cannot check the node
4711
4712   """
4713   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4714                                    lu.cfg.GetHypervisorType())
4715   for node in nodenames:
4716     info = nodeinfo[node]
4717     info.Raise("Cannot get current information from node %s" % node,
4718                prereq=True, ecode=errors.ECODE_ENVIRON)
4719     vg_free = info.payload.get("vg_free", None)
4720     if not isinstance(vg_free, int):
4721       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4722                                  " result was '%s'" % (node, vg_free),
4723                                  errors.ECODE_ENVIRON)
4724     if requested > vg_free:
4725       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4726                                  " required %d MiB, available %d MiB" %
4727                                  (node, requested, vg_free),
4728                                  errors.ECODE_NORES)
4729
4730
4731 class LUStartupInstance(LogicalUnit):
4732   """Starts an instance.
4733
4734   """
4735   HPATH = "instance-start"
4736   HTYPE = constants.HTYPE_INSTANCE
4737   _OP_PARAMS = [
4738     _PInstanceName,
4739     _PForce,
4740     _PIgnoreOfflineNodes,
4741     ("hvparams", ht.EmptyDict, ht.TDict),
4742     ("beparams", ht.EmptyDict, ht.TDict),
4743     ]
4744   REQ_BGL = False
4745
4746   def CheckArguments(self):
4747     # extra beparams
4748     if self.op.beparams:
4749       # fill the beparams dict
4750       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4751
4752   def ExpandNames(self):
4753     self._ExpandAndLockInstance()
4754
4755   def BuildHooksEnv(self):
4756     """Build hooks env.
4757
4758     This runs on master, primary and secondary nodes of the instance.
4759
4760     """
4761     env = {
4762       "FORCE": self.op.force,
4763       }
4764     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4765     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4766     return env, nl, nl
4767
4768   def CheckPrereq(self):
4769     """Check prerequisites.
4770
4771     This checks that the instance is in the cluster.
4772
4773     """
4774     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4775     assert self.instance is not None, \
4776       "Cannot retrieve locked instance %s" % self.op.instance_name
4777
4778     # extra hvparams
4779     if self.op.hvparams:
4780       # check hypervisor parameter syntax (locally)
4781       cluster = self.cfg.GetClusterInfo()
4782       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4783       filled_hvp = cluster.FillHV(instance)
4784       filled_hvp.update(self.op.hvparams)
4785       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4786       hv_type.CheckParameterSyntax(filled_hvp)
4787       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4788
4789     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
4790
4791     if self.primary_offline and self.op.ignore_offline_nodes:
4792       self.proc.LogWarning("Ignoring offline primary node")
4793
4794       if self.op.hvparams or self.op.beparams:
4795         self.proc.LogWarning("Overridden parameters are ignored")
4796     else:
4797       _CheckNodeOnline(self, instance.primary_node)
4798
4799       bep = self.cfg.GetClusterInfo().FillBE(instance)
4800
4801       # check bridges existence
4802       _CheckInstanceBridgesExist(self, instance)
4803
4804       remote_info = self.rpc.call_instance_info(instance.primary_node,
4805                                                 instance.name,
4806                                                 instance.hypervisor)
4807       remote_info.Raise("Error checking node %s" % instance.primary_node,
4808                         prereq=True, ecode=errors.ECODE_ENVIRON)
4809       if not remote_info.payload: # not running already
4810         _CheckNodeFreeMemory(self, instance.primary_node,
4811                              "starting instance %s" % instance.name,
4812                              bep[constants.BE_MEMORY], instance.hypervisor)
4813
4814   def Exec(self, feedback_fn):
4815     """Start the instance.
4816
4817     """
4818     instance = self.instance
4819     force = self.op.force
4820
4821     self.cfg.MarkInstanceUp(instance.name)
4822
4823     if self.primary_offline:
4824       assert self.op.ignore_offline_nodes
4825       self.proc.LogInfo("Primary node offline, marked instance as started")
4826     else:
4827       node_current = instance.primary_node
4828
4829       _StartInstanceDisks(self, instance, force)
4830
4831       result = self.rpc.call_instance_start(node_current, instance,
4832                                             self.op.hvparams, self.op.beparams)
4833       msg = result.fail_msg
4834       if msg:
4835         _ShutdownInstanceDisks(self, instance)
4836         raise errors.OpExecError("Could not start instance: %s" % msg)
4837
4838
4839 class LURebootInstance(LogicalUnit):
4840   """Reboot an instance.
4841
4842   """
4843   HPATH = "instance-reboot"
4844   HTYPE = constants.HTYPE_INSTANCE
4845   _OP_PARAMS = [
4846     _PInstanceName,
4847     ("ignore_secondaries", False, ht.TBool),
4848     ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
4849     _PShutdownTimeout,
4850     ]
4851   REQ_BGL = False
4852
4853   def ExpandNames(self):
4854     self._ExpandAndLockInstance()
4855
4856   def BuildHooksEnv(self):
4857     """Build hooks env.
4858
4859     This runs on master, primary and secondary nodes of the instance.
4860
4861     """
4862     env = {
4863       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4864       "REBOOT_TYPE": self.op.reboot_type,
4865       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4866       }
4867     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4868     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4869     return env, nl, nl
4870
4871   def CheckPrereq(self):
4872     """Check prerequisites.
4873
4874     This checks that the instance is in the cluster.
4875
4876     """
4877     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4878     assert self.instance is not None, \
4879       "Cannot retrieve locked instance %s" % self.op.instance_name
4880
4881     _CheckNodeOnline(self, instance.primary_node)
4882
4883     # check bridges existence
4884     _CheckInstanceBridgesExist(self, instance)
4885
4886   def Exec(self, feedback_fn):
4887     """Reboot the instance.
4888
4889     """
4890     instance = self.instance
4891     ignore_secondaries = self.op.ignore_secondaries
4892     reboot_type = self.op.reboot_type
4893
4894     node_current = instance.primary_node
4895
4896     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4897                        constants.INSTANCE_REBOOT_HARD]:
4898       for disk in instance.disks:
4899         self.cfg.SetDiskID(disk, node_current)
4900       result = self.rpc.call_instance_reboot(node_current, instance,
4901                                              reboot_type,
4902                                              self.op.shutdown_timeout)
4903       result.Raise("Could not reboot instance")
4904     else:
4905       result = self.rpc.call_instance_shutdown(node_current, instance,
4906                                                self.op.shutdown_timeout)
4907       result.Raise("Could not shutdown instance for full reboot")
4908       _ShutdownInstanceDisks(self, instance)
4909       _StartInstanceDisks(self, instance, ignore_secondaries)
4910       result = self.rpc.call_instance_start(node_current, instance, None, None)
4911       msg = result.fail_msg
4912       if msg:
4913         _ShutdownInstanceDisks(self, instance)
4914         raise errors.OpExecError("Could not start instance for"
4915                                  " full reboot: %s" % msg)
4916
4917     self.cfg.MarkInstanceUp(instance.name)
4918
4919
4920 class LUShutdownInstance(LogicalUnit):
4921   """Shutdown an instance.
4922
4923   """
4924   HPATH = "instance-stop"
4925   HTYPE = constants.HTYPE_INSTANCE
4926   _OP_PARAMS = [
4927     _PInstanceName,
4928     _PIgnoreOfflineNodes,
4929     ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
4930     ]
4931   REQ_BGL = False
4932
4933   def ExpandNames(self):
4934     self._ExpandAndLockInstance()
4935
4936   def BuildHooksEnv(self):
4937     """Build hooks env.
4938
4939     This runs on master, primary and secondary nodes of the instance.
4940
4941     """
4942     env = _BuildInstanceHookEnvByObject(self, self.instance)
4943     env["TIMEOUT"] = self.op.timeout
4944     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4945     return env, nl, nl
4946
4947   def CheckPrereq(self):
4948     """Check prerequisites.
4949
4950     This checks that the instance is in the cluster.
4951
4952     """
4953     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4954     assert self.instance is not None, \
4955       "Cannot retrieve locked instance %s" % self.op.instance_name
4956
4957     self.primary_offline = \
4958       self.cfg.GetNodeInfo(self.instance.primary_node).offline
4959
4960     if self.primary_offline and self.op.ignore_offline_nodes:
4961       self.proc.LogWarning("Ignoring offline primary node")
4962     else:
4963       _CheckNodeOnline(self, self.instance.primary_node)
4964
4965   def Exec(self, feedback_fn):
4966     """Shutdown the instance.
4967
4968     """
4969     instance = self.instance
4970     node_current = instance.primary_node
4971     timeout = self.op.timeout
4972
4973     self.cfg.MarkInstanceDown(instance.name)
4974
4975     if self.primary_offline:
4976       assert self.op.ignore_offline_nodes
4977       self.proc.LogInfo("Primary node offline, marked instance as stopped")
4978     else:
4979       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4980       msg = result.fail_msg
4981       if msg:
4982         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4983
4984       _ShutdownInstanceDisks(self, instance)
4985
4986
4987 class LUReinstallInstance(LogicalUnit):
4988   """Reinstall an instance.
4989
4990   """
4991   HPATH = "instance-reinstall"
4992   HTYPE = constants.HTYPE_INSTANCE
4993   _OP_PARAMS = [
4994     _PInstanceName,
4995     ("os_type", None, ht.TMaybeString),
4996     ("force_variant", False, ht.TBool),
4997     ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
4998     ]
4999   REQ_BGL = False
5000
5001   def ExpandNames(self):
5002     self._ExpandAndLockInstance()
5003
5004   def BuildHooksEnv(self):
5005     """Build hooks env.
5006
5007     This runs on master, primary and secondary nodes of the instance.
5008
5009     """
5010     env = _BuildInstanceHookEnvByObject(self, self.instance)
5011     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5012     return env, nl, nl
5013
5014   def CheckPrereq(self):
5015     """Check prerequisites.
5016
5017     This checks that the instance is in the cluster and is not running.
5018
5019     """
5020     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5021     assert instance is not None, \
5022       "Cannot retrieve locked instance %s" % self.op.instance_name
5023     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5024                      " offline, cannot reinstall")
5025     for node in instance.secondary_nodes:
5026       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5027                        " cannot reinstall")
5028
5029     if instance.disk_template == constants.DT_DISKLESS:
5030       raise errors.OpPrereqError("Instance '%s' has no disks" %
5031                                  self.op.instance_name,
5032                                  errors.ECODE_INVAL)
5033     _CheckInstanceDown(self, instance, "cannot reinstall")
5034
5035     if self.op.os_type is not None:
5036       # OS verification
5037       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5038       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5039       instance_os = self.op.os_type
5040     else:
5041       instance_os = instance.os
5042
5043     nodelist = list(instance.all_nodes)
5044
5045     if self.op.osparams:
5046       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5047       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5048       self.os_inst = i_osdict # the new dict (without defaults)
5049     else:
5050       self.os_inst = None
5051
5052     self.instance = instance
5053
5054   def Exec(self, feedback_fn):
5055     """Reinstall the instance.
5056
5057     """
5058     inst = self.instance
5059
5060     if self.op.os_type is not None:
5061       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5062       inst.os = self.op.os_type
5063       # Write to configuration
5064       self.cfg.Update(inst, feedback_fn)
5065
5066     _StartInstanceDisks(self, inst, None)
5067     try:
5068       feedback_fn("Running the instance OS create scripts...")
5069       # FIXME: pass debug option from opcode to backend
5070       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5071                                              self.op.debug_level,
5072                                              osparams=self.os_inst)
5073       result.Raise("Could not install OS for instance %s on node %s" %
5074                    (inst.name, inst.primary_node))
5075     finally:
5076       _ShutdownInstanceDisks(self, inst)
5077
5078
5079 class LURecreateInstanceDisks(LogicalUnit):
5080   """Recreate an instance's missing disks.
5081
5082   """
5083   HPATH = "instance-recreate-disks"
5084   HTYPE = constants.HTYPE_INSTANCE
5085   _OP_PARAMS = [
5086     _PInstanceName,
5087     ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
5088     ]
5089   REQ_BGL = False
5090
5091   def ExpandNames(self):
5092     self._ExpandAndLockInstance()
5093
5094   def BuildHooksEnv(self):
5095     """Build hooks env.
5096
5097     This runs on master, primary and secondary nodes of the instance.
5098
5099     """
5100     env = _BuildInstanceHookEnvByObject(self, self.instance)
5101     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5102     return env, nl, nl
5103
5104   def CheckPrereq(self):
5105     """Check prerequisites.
5106
5107     This checks that the instance is in the cluster and is not running.
5108
5109     """
5110     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5111     assert instance is not None, \
5112       "Cannot retrieve locked instance %s" % self.op.instance_name
5113     _CheckNodeOnline(self, instance.primary_node)
5114
5115     if instance.disk_template == constants.DT_DISKLESS:
5116       raise errors.OpPrereqError("Instance '%s' has no disks" %
5117                                  self.op.instance_name, errors.ECODE_INVAL)
5118     _CheckInstanceDown(self, instance, "cannot recreate disks")
5119
5120     if not self.op.disks:
5121       self.op.disks = range(len(instance.disks))
5122     else:
5123       for idx in self.op.disks:
5124         if idx >= len(instance.disks):
5125           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5126                                      errors.ECODE_INVAL)
5127
5128     self.instance = instance
5129
5130   def Exec(self, feedback_fn):
5131     """Recreate the disks.
5132
5133     """
5134     to_skip = []
5135     for idx, _ in enumerate(self.instance.disks):
5136       if idx not in self.op.disks: # disk idx has not been passed in
5137         to_skip.append(idx)
5138         continue
5139
5140     _CreateDisks(self, self.instance, to_skip=to_skip)
5141
5142
5143 class LURenameInstance(LogicalUnit):
5144   """Rename an instance.
5145
5146   """
5147   HPATH = "instance-rename"
5148   HTYPE = constants.HTYPE_INSTANCE
5149   _OP_PARAMS = [
5150     _PInstanceName,
5151     ("new_name", ht.NoDefault, ht.TNonEmptyString),
5152     ("ip_check", False, ht.TBool),
5153     ("name_check", True, ht.TBool),
5154     ]
5155
5156   def CheckArguments(self):
5157     """Check arguments.
5158
5159     """
5160     if self.op.ip_check and not self.op.name_check:
5161       # TODO: make the ip check more flexible and not depend on the name check
5162       raise errors.OpPrereqError("Cannot do ip check without a name check",
5163                                  errors.ECODE_INVAL)
5164
5165   def BuildHooksEnv(self):
5166     """Build hooks env.
5167
5168     This runs on master, primary and secondary nodes of the instance.
5169
5170     """
5171     env = _BuildInstanceHookEnvByObject(self, self.instance)
5172     env["INSTANCE_NEW_NAME"] = self.op.new_name
5173     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5174     return env, nl, nl
5175
5176   def CheckPrereq(self):
5177     """Check prerequisites.
5178
5179     This checks that the instance is in the cluster and is not running.
5180
5181     """
5182     self.op.instance_name = _ExpandInstanceName(self.cfg,
5183                                                 self.op.instance_name)
5184     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5185     assert instance is not None
5186     _CheckNodeOnline(self, instance.primary_node)
5187     _CheckInstanceDown(self, instance, "cannot rename")
5188     self.instance = instance
5189
5190     new_name = self.op.new_name
5191     if self.op.name_check:
5192       hostname = netutils.GetHostname(name=new_name)
5193       new_name = self.op.new_name = hostname.name
5194       if (self.op.ip_check and
5195           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5196         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5197                                    (hostname.ip, new_name),
5198                                    errors.ECODE_NOTUNIQUE)
5199
5200     instance_list = self.cfg.GetInstanceList()
5201     if new_name in instance_list:
5202       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5203                                  new_name, errors.ECODE_EXISTS)
5204
5205   def Exec(self, feedback_fn):
5206     """Reinstall the instance.
5207
5208     """
5209     inst = self.instance
5210     old_name = inst.name
5211
5212     if inst.disk_template == constants.DT_FILE:
5213       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5214
5215     self.cfg.RenameInstance(inst.name, self.op.new_name)
5216     # Change the instance lock. This is definitely safe while we hold the BGL
5217     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5218     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5219
5220     # re-read the instance from the configuration after rename
5221     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5222
5223     if inst.disk_template == constants.DT_FILE:
5224       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5225       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5226                                                      old_file_storage_dir,
5227                                                      new_file_storage_dir)
5228       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5229                    " (but the instance has been renamed in Ganeti)" %
5230                    (inst.primary_node, old_file_storage_dir,
5231                     new_file_storage_dir))
5232
5233     _StartInstanceDisks(self, inst, None)
5234     try:
5235       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5236                                                  old_name, self.op.debug_level)
5237       msg = result.fail_msg
5238       if msg:
5239         msg = ("Could not run OS rename script for instance %s on node %s"
5240                " (but the instance has been renamed in Ganeti): %s" %
5241                (inst.name, inst.primary_node, msg))
5242         self.proc.LogWarning(msg)
5243     finally:
5244       _ShutdownInstanceDisks(self, inst)
5245
5246     return inst.name
5247
5248
5249 class LURemoveInstance(LogicalUnit):
5250   """Remove an instance.
5251
5252   """
5253   HPATH = "instance-remove"
5254   HTYPE = constants.HTYPE_INSTANCE
5255   _OP_PARAMS = [
5256     _PInstanceName,
5257     ("ignore_failures", False, ht.TBool),
5258     _PShutdownTimeout,
5259     ]
5260   REQ_BGL = False
5261
5262   def ExpandNames(self):
5263     self._ExpandAndLockInstance()
5264     self.needed_locks[locking.LEVEL_NODE] = []
5265     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5266
5267   def DeclareLocks(self, level):
5268     if level == locking.LEVEL_NODE:
5269       self._LockInstancesNodes()
5270
5271   def BuildHooksEnv(self):
5272     """Build hooks env.
5273
5274     This runs on master, primary and secondary nodes of the instance.
5275
5276     """
5277     env = _BuildInstanceHookEnvByObject(self, self.instance)
5278     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5279     nl = [self.cfg.GetMasterNode()]
5280     nl_post = list(self.instance.all_nodes) + nl
5281     return env, nl, nl_post
5282
5283   def CheckPrereq(self):
5284     """Check prerequisites.
5285
5286     This checks that the instance is in the cluster.
5287
5288     """
5289     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5290     assert self.instance is not None, \
5291       "Cannot retrieve locked instance %s" % self.op.instance_name
5292
5293   def Exec(self, feedback_fn):
5294     """Remove the instance.
5295
5296     """
5297     instance = self.instance
5298     logging.info("Shutting down instance %s on node %s",
5299                  instance.name, instance.primary_node)
5300
5301     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5302                                              self.op.shutdown_timeout)
5303     msg = result.fail_msg
5304     if msg:
5305       if self.op.ignore_failures:
5306         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5307       else:
5308         raise errors.OpExecError("Could not shutdown instance %s on"
5309                                  " node %s: %s" %
5310                                  (instance.name, instance.primary_node, msg))
5311
5312     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5313
5314
5315 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5316   """Utility function to remove an instance.
5317
5318   """
5319   logging.info("Removing block devices for instance %s", instance.name)
5320
5321   if not _RemoveDisks(lu, instance):
5322     if not ignore_failures:
5323       raise errors.OpExecError("Can't remove instance's disks")
5324     feedback_fn("Warning: can't remove instance's disks")
5325
5326   logging.info("Removing instance %s out of cluster config", instance.name)
5327
5328   lu.cfg.RemoveInstance(instance.name)
5329
5330   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5331     "Instance lock removal conflict"
5332
5333   # Remove lock for the instance
5334   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5335
5336
5337 class LUQueryInstances(NoHooksLU):
5338   """Logical unit for querying instances.
5339
5340   """
5341   # pylint: disable-msg=W0142
5342   _OP_PARAMS = [
5343     ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
5344     ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5345     ("use_locking", False, ht.TBool),
5346     ]
5347   REQ_BGL = False
5348   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5349                     "serial_no", "ctime", "mtime", "uuid"]
5350   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5351                                     "admin_state",
5352                                     "disk_template", "ip", "mac", "bridge",
5353                                     "nic_mode", "nic_link",
5354                                     "sda_size", "sdb_size", "vcpus", "tags",
5355                                     "network_port", "beparams",
5356                                     r"(disk)\.(size)/([0-9]+)",
5357                                     r"(disk)\.(sizes)", "disk_usage",
5358                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5359                                     r"(nic)\.(bridge)/([0-9]+)",
5360                                     r"(nic)\.(macs|ips|modes|links|bridges)",
5361                                     r"(disk|nic)\.(count)",
5362                                     "hvparams", "custom_hvparams",
5363                                     "custom_beparams", "custom_nicparams",
5364                                     ] + _SIMPLE_FIELDS +
5365                                   ["hv/%s" % name
5366                                    for name in constants.HVS_PARAMETERS
5367                                    if name not in constants.HVC_GLOBALS] +
5368                                   ["be/%s" % name
5369                                    for name in constants.BES_PARAMETERS])
5370   _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5371                                    "oper_ram",
5372                                    "oper_vcpus",
5373                                    "status")
5374
5375
5376   def CheckArguments(self):
5377     _CheckOutputFields(static=self._FIELDS_STATIC,
5378                        dynamic=self._FIELDS_DYNAMIC,
5379                        selected=self.op.output_fields)
5380
5381   def ExpandNames(self):
5382     self.needed_locks = {}
5383     self.share_locks[locking.LEVEL_INSTANCE] = 1
5384     self.share_locks[locking.LEVEL_NODE] = 1
5385
5386     if self.op.names:
5387       self.wanted = _GetWantedInstances(self, self.op.names)
5388     else:
5389       self.wanted = locking.ALL_SET
5390
5391     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5392     self.do_locking = self.do_node_query and self.op.use_locking
5393     if self.do_locking:
5394       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5395       self.needed_locks[locking.LEVEL_NODE] = []
5396       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5397
5398   def DeclareLocks(self, level):
5399     if level == locking.LEVEL_NODE and self.do_locking:
5400       self._LockInstancesNodes()
5401
5402   def Exec(self, feedback_fn):
5403     """Computes the list of nodes and their attributes.
5404
5405     """
5406     # pylint: disable-msg=R0912
5407     # way too many branches here
5408     all_info = self.cfg.GetAllInstancesInfo()
5409     if self.wanted == locking.ALL_SET:
5410       # caller didn't specify instance names, so ordering is not important
5411       if self.do_locking:
5412         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5413       else:
5414         instance_names = all_info.keys()
5415       instance_names = utils.NiceSort(instance_names)
5416     else:
5417       # caller did specify names, so we must keep the ordering
5418       if self.do_locking:
5419         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5420       else:
5421         tgt_set = all_info.keys()
5422       missing = set(self.wanted).difference(tgt_set)
5423       if missing:
5424         raise errors.OpExecError("Some instances were removed before"
5425                                  " retrieving their data: %s" % missing)
5426       instance_names = self.wanted
5427
5428     instance_list = [all_info[iname] for iname in instance_names]
5429
5430     # begin data gathering
5431
5432     nodes = frozenset([inst.primary_node for inst in instance_list])
5433     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5434
5435     bad_nodes = []
5436     off_nodes = []
5437     if self.do_node_query:
5438       live_data = {}
5439       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5440       for name in nodes:
5441         result = node_data[name]
5442         if result.offline:
5443           # offline nodes will be in both lists
5444           off_nodes.append(name)
5445         if result.fail_msg:
5446           bad_nodes.append(name)
5447         else:
5448           if result.payload:
5449             live_data.update(result.payload)
5450           # else no instance is alive
5451     else:
5452       live_data = dict([(name, {}) for name in instance_names])
5453
5454     # end data gathering
5455
5456     HVPREFIX = "hv/"
5457     BEPREFIX = "be/"
5458     output = []
5459     cluster = self.cfg.GetClusterInfo()
5460     for instance in instance_list:
5461       iout = []
5462       i_hv = cluster.FillHV(instance, skip_globals=True)
5463       i_be = cluster.FillBE(instance)
5464       i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5465       for field in self.op.output_fields:
5466         st_match = self._FIELDS_STATIC.Matches(field)
5467         if field in self._SIMPLE_FIELDS:
5468           val = getattr(instance, field)
5469         elif field == "pnode":
5470           val = instance.primary_node
5471         elif field == "snodes":
5472           val = list(instance.secondary_nodes)
5473         elif field == "admin_state":
5474           val = instance.admin_up
5475         elif field == "oper_state":
5476           if instance.primary_node in bad_nodes:
5477             val = None
5478           else:
5479             val = bool(live_data.get(instance.name))
5480         elif field == "status":
5481           if instance.primary_node in off_nodes:
5482             val = "ERROR_nodeoffline"
5483           elif instance.primary_node in bad_nodes:
5484             val = "ERROR_nodedown"
5485           else:
5486             running = bool(live_data.get(instance.name))
5487             if running:
5488               if instance.admin_up:
5489                 val = "running"
5490               else:
5491                 val = "ERROR_up"
5492             else:
5493               if instance.admin_up:
5494                 val = "ERROR_down"
5495               else:
5496                 val = "ADMIN_down"
5497         elif field == "oper_ram":
5498           if instance.primary_node in bad_nodes:
5499             val = None
5500           elif instance.name in live_data:
5501             val = live_data[instance.name].get("memory", "?")
5502           else:
5503             val = "-"
5504         elif field == "oper_vcpus":
5505           if instance.primary_node in bad_nodes:
5506             val = None
5507           elif instance.name in live_data:
5508             val = live_data[instance.name].get("vcpus", "?")
5509           else:
5510             val = "-"
5511         elif field == "vcpus":
5512           val = i_be[constants.BE_VCPUS]
5513         elif field == "disk_template":
5514           val = instance.disk_template
5515         elif field == "ip":
5516           if instance.nics:
5517             val = instance.nics[0].ip
5518           else:
5519             val = None
5520         elif field == "nic_mode":
5521           if instance.nics:
5522             val = i_nicp[0][constants.NIC_MODE]
5523           else:
5524             val = None
5525         elif field == "nic_link":
5526           if instance.nics:
5527             val = i_nicp[0][constants.NIC_LINK]
5528           else:
5529             val = None
5530         elif field == "bridge":
5531           if (instance.nics and
5532               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5533             val = i_nicp[0][constants.NIC_LINK]
5534           else:
5535             val = None
5536         elif field == "mac":
5537           if instance.nics:
5538             val = instance.nics[0].mac
5539           else:
5540             val = None
5541         elif field == "custom_nicparams":
5542           val = [nic.nicparams for nic in instance.nics]
5543         elif field == "sda_size" or field == "sdb_size":
5544           idx = ord(field[2]) - ord('a')
5545           try:
5546             val = instance.FindDisk(idx).size
5547           except errors.OpPrereqError:
5548             val = None
5549         elif field == "disk_usage": # total disk usage per node
5550           disk_sizes = [{'size': disk.size} for disk in instance.disks]
5551           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5552         elif field == "tags":
5553           val = list(instance.GetTags())
5554         elif field == "custom_hvparams":
5555           val = instance.hvparams # not filled!
5556         elif field == "hvparams":
5557           val = i_hv
5558         elif (field.startswith(HVPREFIX) and
5559               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5560               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5561           val = i_hv.get(field[len(HVPREFIX):], None)
5562         elif field == "custom_beparams":
5563           val = instance.beparams
5564         elif field == "beparams":
5565           val = i_be
5566         elif (field.startswith(BEPREFIX) and
5567               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5568           val = i_be.get(field[len(BEPREFIX):], None)
5569         elif st_match and st_match.groups():
5570           # matches a variable list
5571           st_groups = st_match.groups()
5572           if st_groups and st_groups[0] == "disk":
5573             if st_groups[1] == "count":
5574               val = len(instance.disks)
5575             elif st_groups[1] == "sizes":
5576               val = [disk.size for disk in instance.disks]
5577             elif st_groups[1] == "size":
5578               try:
5579                 val = instance.FindDisk(st_groups[2]).size
5580               except errors.OpPrereqError:
5581                 val = None
5582             else:
5583               assert False, "Unhandled disk parameter"
5584           elif st_groups[0] == "nic":
5585             if st_groups[1] == "count":
5586               val = len(instance.nics)
5587             elif st_groups[1] == "macs":
5588               val = [nic.mac for nic in instance.nics]
5589             elif st_groups[1] == "ips":
5590               val = [nic.ip for nic in instance.nics]
5591             elif st_groups[1] == "modes":
5592               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5593             elif st_groups[1] == "links":
5594               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5595             elif st_groups[1] == "bridges":
5596               val = []
5597               for nicp in i_nicp:
5598                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5599                   val.append(nicp[constants.NIC_LINK])
5600                 else:
5601                   val.append(None)
5602             else:
5603               # index-based item
5604               nic_idx = int(st_groups[2])
5605               if nic_idx >= len(instance.nics):
5606                 val = None
5607               else:
5608                 if st_groups[1] == "mac":
5609                   val = instance.nics[nic_idx].mac
5610                 elif st_groups[1] == "ip":
5611                   val = instance.nics[nic_idx].ip
5612                 elif st_groups[1] == "mode":
5613                   val = i_nicp[nic_idx][constants.NIC_MODE]
5614                 elif st_groups[1] == "link":
5615                   val = i_nicp[nic_idx][constants.NIC_LINK]
5616                 elif st_groups[1] == "bridge":
5617                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5618                   if nic_mode == constants.NIC_MODE_BRIDGED:
5619                     val = i_nicp[nic_idx][constants.NIC_LINK]
5620                   else:
5621                     val = None
5622                 else:
5623                   assert False, "Unhandled NIC parameter"
5624           else:
5625             assert False, ("Declared but unhandled variable parameter '%s'" %
5626                            field)
5627         else:
5628           assert False, "Declared but unhandled parameter '%s'" % field
5629         iout.append(val)
5630       output.append(iout)
5631
5632     return output
5633
5634
5635 class LUFailoverInstance(LogicalUnit):
5636   """Failover an instance.
5637
5638   """
5639   HPATH = "instance-failover"
5640   HTYPE = constants.HTYPE_INSTANCE
5641   _OP_PARAMS = [
5642     _PInstanceName,
5643     ("ignore_consistency", False, ht.TBool),
5644     _PShutdownTimeout,
5645     ]
5646   REQ_BGL = False
5647
5648   def ExpandNames(self):
5649     self._ExpandAndLockInstance()
5650     self.needed_locks[locking.LEVEL_NODE] = []
5651     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5652
5653   def DeclareLocks(self, level):
5654     if level == locking.LEVEL_NODE:
5655       self._LockInstancesNodes()
5656
5657   def BuildHooksEnv(self):
5658     """Build hooks env.
5659
5660     This runs on master, primary and secondary nodes of the instance.
5661
5662     """
5663     instance = self.instance
5664     source_node = instance.primary_node
5665     target_node = instance.secondary_nodes[0]
5666     env = {
5667       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5668       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5669       "OLD_PRIMARY": source_node,
5670       "OLD_SECONDARY": target_node,
5671       "NEW_PRIMARY": target_node,
5672       "NEW_SECONDARY": source_node,
5673       }
5674     env.update(_BuildInstanceHookEnvByObject(self, instance))
5675     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5676     nl_post = list(nl)
5677     nl_post.append(source_node)
5678     return env, nl, nl_post
5679
5680   def CheckPrereq(self):
5681     """Check prerequisites.
5682
5683     This checks that the instance is in the cluster.
5684
5685     """
5686     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5687     assert self.instance is not None, \
5688       "Cannot retrieve locked instance %s" % self.op.instance_name
5689
5690     bep = self.cfg.GetClusterInfo().FillBE(instance)
5691     if instance.disk_template not in constants.DTS_NET_MIRROR:
5692       raise errors.OpPrereqError("Instance's disk layout is not"
5693                                  " network mirrored, cannot failover.",
5694                                  errors.ECODE_STATE)
5695
5696     secondary_nodes = instance.secondary_nodes
5697     if not secondary_nodes:
5698       raise errors.ProgrammerError("no secondary node but using "
5699                                    "a mirrored disk template")
5700
5701     target_node = secondary_nodes[0]
5702     _CheckNodeOnline(self, target_node)
5703     _CheckNodeNotDrained(self, target_node)
5704     if instance.admin_up:
5705       # check memory requirements on the secondary node
5706       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5707                            instance.name, bep[constants.BE_MEMORY],
5708                            instance.hypervisor)
5709     else:
5710       self.LogInfo("Not checking memory on the secondary node as"
5711                    " instance will not be started")
5712
5713     # check bridge existance
5714     _CheckInstanceBridgesExist(self, instance, node=target_node)
5715
5716   def Exec(self, feedback_fn):
5717     """Failover an instance.
5718
5719     The failover is done by shutting it down on its present node and
5720     starting it on the secondary.
5721
5722     """
5723     instance = self.instance
5724     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5725
5726     source_node = instance.primary_node
5727     target_node = instance.secondary_nodes[0]
5728
5729     if instance.admin_up:
5730       feedback_fn("* checking disk consistency between source and target")
5731       for dev in instance.disks:
5732         # for drbd, these are drbd over lvm
5733         if not _CheckDiskConsistency(self, dev, target_node, False):
5734           if not self.op.ignore_consistency:
5735             raise errors.OpExecError("Disk %s is degraded on target node,"
5736                                      " aborting failover." % dev.iv_name)
5737     else:
5738       feedback_fn("* not checking disk consistency as instance is not running")
5739
5740     feedback_fn("* shutting down instance on source node")
5741     logging.info("Shutting down instance %s on node %s",
5742                  instance.name, source_node)
5743
5744     result = self.rpc.call_instance_shutdown(source_node, instance,
5745                                              self.op.shutdown_timeout)
5746     msg = result.fail_msg
5747     if msg:
5748       if self.op.ignore_consistency or primary_node.offline:
5749         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5750                              " Proceeding anyway. Please make sure node"
5751                              " %s is down. Error details: %s",
5752                              instance.name, source_node, source_node, msg)
5753       else:
5754         raise errors.OpExecError("Could not shutdown instance %s on"
5755                                  " node %s: %s" %
5756                                  (instance.name, source_node, msg))
5757
5758     feedback_fn("* deactivating the instance's disks on source node")
5759     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5760       raise errors.OpExecError("Can't shut down the instance's disks.")
5761
5762     instance.primary_node = target_node
5763     # distribute new instance config to the other nodes
5764     self.cfg.Update(instance, feedback_fn)
5765
5766     # Only start the instance if it's marked as up
5767     if instance.admin_up:
5768       feedback_fn("* activating the instance's disks on target node")
5769       logging.info("Starting instance %s on node %s",
5770                    instance.name, target_node)
5771
5772       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5773                                            ignore_secondaries=True)
5774       if not disks_ok:
5775         _ShutdownInstanceDisks(self, instance)
5776         raise errors.OpExecError("Can't activate the instance's disks")
5777
5778       feedback_fn("* starting the instance on the target node")
5779       result = self.rpc.call_instance_start(target_node, instance, None, None)
5780       msg = result.fail_msg
5781       if msg:
5782         _ShutdownInstanceDisks(self, instance)
5783         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5784                                  (instance.name, target_node, msg))
5785
5786
5787 class LUMigrateInstance(LogicalUnit):
5788   """Migrate an instance.
5789
5790   This is migration without shutting down, compared to the failover,
5791   which is done with shutdown.
5792
5793   """
5794   HPATH = "instance-migrate"
5795   HTYPE = constants.HTYPE_INSTANCE
5796   _OP_PARAMS = [
5797     _PInstanceName,
5798     _PMigrationMode,
5799     _PMigrationLive,
5800     ("cleanup", False, ht.TBool),
5801     ]
5802
5803   REQ_BGL = False
5804
5805   def ExpandNames(self):
5806     self._ExpandAndLockInstance()
5807
5808     self.needed_locks[locking.LEVEL_NODE] = []
5809     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5810
5811     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5812                                        self.op.cleanup)
5813     self.tasklets = [self._migrater]
5814
5815   def DeclareLocks(self, level):
5816     if level == locking.LEVEL_NODE:
5817       self._LockInstancesNodes()
5818
5819   def BuildHooksEnv(self):
5820     """Build hooks env.
5821
5822     This runs on master, primary and secondary nodes of the instance.
5823
5824     """
5825     instance = self._migrater.instance
5826     source_node = instance.primary_node
5827     target_node = instance.secondary_nodes[0]
5828     env = _BuildInstanceHookEnvByObject(self, instance)
5829     env["MIGRATE_LIVE"] = self._migrater.live
5830     env["MIGRATE_CLEANUP"] = self.op.cleanup
5831     env.update({
5832         "OLD_PRIMARY": source_node,
5833         "OLD_SECONDARY": target_node,
5834         "NEW_PRIMARY": target_node,
5835         "NEW_SECONDARY": source_node,
5836         })
5837     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5838     nl_post = list(nl)
5839     nl_post.append(source_node)
5840     return env, nl, nl_post
5841
5842
5843 class LUMoveInstance(LogicalUnit):
5844   """Move an instance by data-copying.
5845
5846   """
5847   HPATH = "instance-move"
5848   HTYPE = constants.HTYPE_INSTANCE
5849   _OP_PARAMS = [
5850     _PInstanceName,
5851     ("target_node", ht.NoDefault, ht.TNonEmptyString),
5852     _PShutdownTimeout,
5853     ]
5854   REQ_BGL = False
5855
5856   def ExpandNames(self):
5857     self._ExpandAndLockInstance()
5858     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5859     self.op.target_node = target_node
5860     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5861     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5862
5863   def DeclareLocks(self, level):
5864     if level == locking.LEVEL_NODE:
5865       self._LockInstancesNodes(primary_only=True)
5866
5867   def BuildHooksEnv(self):
5868     """Build hooks env.
5869
5870     This runs on master, primary and secondary nodes of the instance.
5871
5872     """
5873     env = {
5874       "TARGET_NODE": self.op.target_node,
5875       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5876       }
5877     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5878     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5879                                        self.op.target_node]
5880     return env, nl, nl
5881
5882   def CheckPrereq(self):
5883     """Check prerequisites.
5884
5885     This checks that the instance is in the cluster.
5886
5887     """
5888     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5889     assert self.instance is not None, \
5890       "Cannot retrieve locked instance %s" % self.op.instance_name
5891
5892     node = self.cfg.GetNodeInfo(self.op.target_node)
5893     assert node is not None, \
5894       "Cannot retrieve locked node %s" % self.op.target_node
5895
5896     self.target_node = target_node = node.name
5897
5898     if target_node == instance.primary_node:
5899       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5900                                  (instance.name, target_node),
5901                                  errors.ECODE_STATE)
5902
5903     bep = self.cfg.GetClusterInfo().FillBE(instance)
5904
5905     for idx, dsk in enumerate(instance.disks):
5906       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5907         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5908                                    " cannot copy" % idx, errors.ECODE_STATE)
5909
5910     _CheckNodeOnline(self, target_node)
5911     _CheckNodeNotDrained(self, target_node)
5912     _CheckNodeVmCapable(self, target_node)
5913
5914     if instance.admin_up:
5915       # check memory requirements on the secondary node
5916       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5917                            instance.name, bep[constants.BE_MEMORY],
5918                            instance.hypervisor)
5919     else:
5920       self.LogInfo("Not checking memory on the secondary node as"
5921                    " instance will not be started")
5922
5923     # check bridge existance
5924     _CheckInstanceBridgesExist(self, instance, node=target_node)
5925
5926   def Exec(self, feedback_fn):
5927     """Move an instance.
5928
5929     The move is done by shutting it down on its present node, copying
5930     the data over (slow) and starting it on the new node.
5931
5932     """
5933     instance = self.instance
5934
5935     source_node = instance.primary_node
5936     target_node = self.target_node
5937
5938     self.LogInfo("Shutting down instance %s on source node %s",
5939                  instance.name, source_node)
5940
5941     result = self.rpc.call_instance_shutdown(source_node, instance,
5942                                              self.op.shutdown_timeout)
5943     msg = result.fail_msg
5944     if msg:
5945       if self.op.ignore_consistency:
5946         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5947                              " Proceeding anyway. Please make sure node"
5948                              " %s is down. Error details: %s",
5949                              instance.name, source_node, source_node, msg)
5950       else:
5951         raise errors.OpExecError("Could not shutdown instance %s on"
5952                                  " node %s: %s" %
5953                                  (instance.name, source_node, msg))
5954
5955     # create the target disks
5956     try:
5957       _CreateDisks(self, instance, target_node=target_node)
5958     except errors.OpExecError:
5959       self.LogWarning("Device creation failed, reverting...")
5960       try:
5961         _RemoveDisks(self, instance, target_node=target_node)
5962       finally:
5963         self.cfg.ReleaseDRBDMinors(instance.name)
5964         raise
5965
5966     cluster_name = self.cfg.GetClusterInfo().cluster_name
5967
5968     errs = []
5969     # activate, get path, copy the data over
5970     for idx, disk in enumerate(instance.disks):
5971       self.LogInfo("Copying data for disk %d", idx)
5972       result = self.rpc.call_blockdev_assemble(target_node, disk,
5973                                                instance.name, True)
5974       if result.fail_msg:
5975         self.LogWarning("Can't assemble newly created disk %d: %s",
5976                         idx, result.fail_msg)
5977         errs.append(result.fail_msg)
5978         break
5979       dev_path = result.payload
5980       result = self.rpc.call_blockdev_export(source_node, disk,
5981                                              target_node, dev_path,
5982                                              cluster_name)
5983       if result.fail_msg:
5984         self.LogWarning("Can't copy data over for disk %d: %s",
5985                         idx, result.fail_msg)
5986         errs.append(result.fail_msg)
5987         break
5988
5989     if errs:
5990       self.LogWarning("Some disks failed to copy, aborting")
5991       try:
5992         _RemoveDisks(self, instance, target_node=target_node)
5993       finally:
5994         self.cfg.ReleaseDRBDMinors(instance.name)
5995         raise errors.OpExecError("Errors during disk copy: %s" %
5996                                  (",".join(errs),))
5997
5998     instance.primary_node = target_node
5999     self.cfg.Update(instance, feedback_fn)
6000
6001     self.LogInfo("Removing the disks on the original node")
6002     _RemoveDisks(self, instance, target_node=source_node)
6003
6004     # Only start the instance if it's marked as up
6005     if instance.admin_up:
6006       self.LogInfo("Starting instance %s on node %s",
6007                    instance.name, target_node)
6008
6009       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6010                                            ignore_secondaries=True)
6011       if not disks_ok:
6012         _ShutdownInstanceDisks(self, instance)
6013         raise errors.OpExecError("Can't activate the instance's disks")
6014
6015       result = self.rpc.call_instance_start(target_node, instance, None, None)
6016       msg = result.fail_msg
6017       if msg:
6018         _ShutdownInstanceDisks(self, instance)
6019         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6020                                  (instance.name, target_node, msg))
6021
6022
6023 class LUMigrateNode(LogicalUnit):
6024   """Migrate all instances from a node.
6025
6026   """
6027   HPATH = "node-migrate"
6028   HTYPE = constants.HTYPE_NODE
6029   _OP_PARAMS = [
6030     _PNodeName,
6031     _PMigrationMode,
6032     _PMigrationLive,
6033     ]
6034   REQ_BGL = False
6035
6036   def ExpandNames(self):
6037     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6038
6039     self.needed_locks = {
6040       locking.LEVEL_NODE: [self.op.node_name],
6041       }
6042
6043     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6044
6045     # Create tasklets for migrating instances for all instances on this node
6046     names = []
6047     tasklets = []
6048
6049     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6050       logging.debug("Migrating instance %s", inst.name)
6051       names.append(inst.name)
6052
6053       tasklets.append(TLMigrateInstance(self, inst.name, False))
6054
6055     self.tasklets = tasklets
6056
6057     # Declare instance locks
6058     self.needed_locks[locking.LEVEL_INSTANCE] = names
6059
6060   def DeclareLocks(self, level):
6061     if level == locking.LEVEL_NODE:
6062       self._LockInstancesNodes()
6063
6064   def BuildHooksEnv(self):
6065     """Build hooks env.
6066
6067     This runs on the master, the primary and all the secondaries.
6068
6069     """
6070     env = {
6071       "NODE_NAME": self.op.node_name,
6072       }
6073
6074     nl = [self.cfg.GetMasterNode()]
6075
6076     return (env, nl, nl)
6077
6078
6079 class TLMigrateInstance(Tasklet):
6080   """Tasklet class for instance migration.
6081
6082   @type live: boolean
6083   @ivar live: whether the migration will be done live or non-live;
6084       this variable is initalized only after CheckPrereq has run
6085
6086   """
6087   def __init__(self, lu, instance_name, cleanup):
6088     """Initializes this class.
6089
6090     """
6091     Tasklet.__init__(self, lu)
6092
6093     # Parameters
6094     self.instance_name = instance_name
6095     self.cleanup = cleanup
6096     self.live = False # will be overridden later
6097
6098   def CheckPrereq(self):
6099     """Check prerequisites.
6100
6101     This checks that the instance is in the cluster.
6102
6103     """
6104     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6105     instance = self.cfg.GetInstanceInfo(instance_name)
6106     assert instance is not None
6107
6108     if instance.disk_template != constants.DT_DRBD8:
6109       raise errors.OpPrereqError("Instance's disk layout is not"
6110                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
6111
6112     secondary_nodes = instance.secondary_nodes
6113     if not secondary_nodes:
6114       raise errors.ConfigurationError("No secondary node but using"
6115                                       " drbd8 disk template")
6116
6117     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6118
6119     target_node = secondary_nodes[0]
6120     # check memory requirements on the secondary node
6121     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6122                          instance.name, i_be[constants.BE_MEMORY],
6123                          instance.hypervisor)
6124
6125     # check bridge existance
6126     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6127
6128     if not self.cleanup:
6129       _CheckNodeNotDrained(self.lu, target_node)
6130       result = self.rpc.call_instance_migratable(instance.primary_node,
6131                                                  instance)
6132       result.Raise("Can't migrate, please use failover",
6133                    prereq=True, ecode=errors.ECODE_STATE)
6134
6135     self.instance = instance
6136
6137     if self.lu.op.live is not None and self.lu.op.mode is not None:
6138       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6139                                  " parameters are accepted",
6140                                  errors.ECODE_INVAL)
6141     if self.lu.op.live is not None:
6142       if self.lu.op.live:
6143         self.lu.op.mode = constants.HT_MIGRATION_LIVE
6144       else:
6145         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6146       # reset the 'live' parameter to None so that repeated
6147       # invocations of CheckPrereq do not raise an exception
6148       self.lu.op.live = None
6149     elif self.lu.op.mode is None:
6150       # read the default value from the hypervisor
6151       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
6152       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6153
6154     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6155
6156   def _WaitUntilSync(self):
6157     """Poll with custom rpc for disk sync.
6158
6159     This uses our own step-based rpc call.
6160
6161     """
6162     self.feedback_fn("* wait until resync is done")
6163     all_done = False
6164     while not all_done:
6165       all_done = True
6166       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6167                                             self.nodes_ip,
6168                                             self.instance.disks)
6169       min_percent = 100
6170       for node, nres in result.items():
6171         nres.Raise("Cannot resync disks on node %s" % node)
6172         node_done, node_percent = nres.payload
6173         all_done = all_done and node_done
6174         if node_percent is not None:
6175           min_percent = min(min_percent, node_percent)
6176       if not all_done:
6177         if min_percent < 100:
6178           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6179         time.sleep(2)
6180
6181   def _EnsureSecondary(self, node):
6182     """Demote a node to secondary.
6183
6184     """
6185     self.feedback_fn("* switching node %s to secondary mode" % node)
6186
6187     for dev in self.instance.disks:
6188       self.cfg.SetDiskID(dev, node)
6189
6190     result = self.rpc.call_blockdev_close(node, self.instance.name,
6191                                           self.instance.disks)
6192     result.Raise("Cannot change disk to secondary on node %s" % node)
6193
6194   def _GoStandalone(self):
6195     """Disconnect from the network.
6196
6197     """
6198     self.feedback_fn("* changing into standalone mode")
6199     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6200                                                self.instance.disks)
6201     for node, nres in result.items():
6202       nres.Raise("Cannot disconnect disks node %s" % node)
6203
6204   def _GoReconnect(self, multimaster):
6205     """Reconnect to the network.
6206
6207     """
6208     if multimaster:
6209       msg = "dual-master"
6210     else:
6211       msg = "single-master"
6212     self.feedback_fn("* changing disks into %s mode" % msg)
6213     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6214                                            self.instance.disks,
6215                                            self.instance.name, multimaster)
6216     for node, nres in result.items():
6217       nres.Raise("Cannot change disks config on node %s" % node)
6218
6219   def _ExecCleanup(self):
6220     """Try to cleanup after a failed migration.
6221
6222     The cleanup is done by:
6223       - check that the instance is running only on one node
6224         (and update the config if needed)
6225       - change disks on its secondary node to secondary
6226       - wait until disks are fully synchronized
6227       - disconnect from the network
6228       - change disks into single-master mode
6229       - wait again until disks are fully synchronized
6230
6231     """
6232     instance = self.instance
6233     target_node = self.target_node
6234     source_node = self.source_node
6235
6236     # check running on only one node
6237     self.feedback_fn("* checking where the instance actually runs"
6238                      " (if this hangs, the hypervisor might be in"
6239                      " a bad state)")
6240     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6241     for node, result in ins_l.items():
6242       result.Raise("Can't contact node %s" % node)
6243
6244     runningon_source = instance.name in ins_l[source_node].payload
6245     runningon_target = instance.name in ins_l[target_node].payload
6246
6247     if runningon_source and runningon_target:
6248       raise errors.OpExecError("Instance seems to be running on two nodes,"
6249                                " or the hypervisor is confused. You will have"
6250                                " to ensure manually that it runs only on one"
6251                                " and restart this operation.")
6252
6253     if not (runningon_source or runningon_target):
6254       raise errors.OpExecError("Instance does not seem to be running at all."
6255                                " In this case, it's safer to repair by"
6256                                " running 'gnt-instance stop' to ensure disk"
6257                                " shutdown, and then restarting it.")
6258
6259     if runningon_target:
6260       # the migration has actually succeeded, we need to update the config
6261       self.feedback_fn("* instance running on secondary node (%s),"
6262                        " updating config" % target_node)
6263       instance.primary_node = target_node
6264       self.cfg.Update(instance, self.feedback_fn)
6265       demoted_node = source_node
6266     else:
6267       self.feedback_fn("* instance confirmed to be running on its"
6268                        " primary node (%s)" % source_node)
6269       demoted_node = target_node
6270
6271     self._EnsureSecondary(demoted_node)
6272     try:
6273       self._WaitUntilSync()
6274     except errors.OpExecError:
6275       # we ignore here errors, since if the device is standalone, it
6276       # won't be able to sync
6277       pass
6278     self._GoStandalone()
6279     self._GoReconnect(False)
6280     self._WaitUntilSync()
6281
6282     self.feedback_fn("* done")
6283
6284   def _RevertDiskStatus(self):
6285     """Try to revert the disk status after a failed migration.
6286
6287     """
6288     target_node = self.target_node
6289     try:
6290       self._EnsureSecondary(target_node)
6291       self._GoStandalone()
6292       self._GoReconnect(False)
6293       self._WaitUntilSync()
6294     except errors.OpExecError, err:
6295       self.lu.LogWarning("Migration failed and I can't reconnect the"
6296                          " drives: error '%s'\n"
6297                          "Please look and recover the instance status" %
6298                          str(err))
6299
6300   def _AbortMigration(self):
6301     """Call the hypervisor code to abort a started migration.
6302
6303     """
6304     instance = self.instance
6305     target_node = self.target_node
6306     migration_info = self.migration_info
6307
6308     abort_result = self.rpc.call_finalize_migration(target_node,
6309                                                     instance,
6310                                                     migration_info,
6311                                                     False)
6312     abort_msg = abort_result.fail_msg
6313     if abort_msg:
6314       logging.error("Aborting migration failed on target node %s: %s",
6315                     target_node, abort_msg)
6316       # Don't raise an exception here, as we stil have to try to revert the
6317       # disk status, even if this step failed.
6318
6319   def _ExecMigration(self):
6320     """Migrate an instance.
6321
6322     The migrate is done by:
6323       - change the disks into dual-master mode
6324       - wait until disks are fully synchronized again
6325       - migrate the instance
6326       - change disks on the new secondary node (the old primary) to secondary
6327       - wait until disks are fully synchronized
6328       - change disks into single-master mode
6329
6330     """
6331     instance = self.instance
6332     target_node = self.target_node
6333     source_node = self.source_node
6334
6335     self.feedback_fn("* checking disk consistency between source and target")
6336     for dev in instance.disks:
6337       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6338         raise errors.OpExecError("Disk %s is degraded or not fully"
6339                                  " synchronized on target node,"
6340                                  " aborting migrate." % dev.iv_name)
6341
6342     # First get the migration information from the remote node
6343     result = self.rpc.call_migration_info(source_node, instance)
6344     msg = result.fail_msg
6345     if msg:
6346       log_err = ("Failed fetching source migration information from %s: %s" %
6347                  (source_node, msg))
6348       logging.error(log_err)
6349       raise errors.OpExecError(log_err)
6350
6351     self.migration_info = migration_info = result.payload
6352
6353     # Then switch the disks to master/master mode
6354     self._EnsureSecondary(target_node)
6355     self._GoStandalone()
6356     self._GoReconnect(True)
6357     self._WaitUntilSync()
6358
6359     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6360     result = self.rpc.call_accept_instance(target_node,
6361                                            instance,
6362                                            migration_info,
6363                                            self.nodes_ip[target_node])
6364
6365     msg = result.fail_msg
6366     if msg:
6367       logging.error("Instance pre-migration failed, trying to revert"
6368                     " disk status: %s", msg)
6369       self.feedback_fn("Pre-migration failed, aborting")
6370       self._AbortMigration()
6371       self._RevertDiskStatus()
6372       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6373                                (instance.name, msg))
6374
6375     self.feedback_fn("* migrating instance to %s" % target_node)
6376     time.sleep(10)
6377     result = self.rpc.call_instance_migrate(source_node, instance,
6378                                             self.nodes_ip[target_node],
6379                                             self.live)
6380     msg = result.fail_msg
6381     if msg:
6382       logging.error("Instance migration failed, trying to revert"
6383                     " disk status: %s", msg)
6384       self.feedback_fn("Migration failed, aborting")
6385       self._AbortMigration()
6386       self._RevertDiskStatus()
6387       raise errors.OpExecError("Could not migrate instance %s: %s" %
6388                                (instance.name, msg))
6389     time.sleep(10)
6390
6391     instance.primary_node = target_node
6392     # distribute new instance config to the other nodes
6393     self.cfg.Update(instance, self.feedback_fn)
6394
6395     result = self.rpc.call_finalize_migration(target_node,
6396                                               instance,
6397                                               migration_info,
6398                                               True)
6399     msg = result.fail_msg
6400     if msg:
6401       logging.error("Instance migration succeeded, but finalization failed:"
6402                     " %s", msg)
6403       raise errors.OpExecError("Could not finalize instance migration: %s" %
6404                                msg)
6405
6406     self._EnsureSecondary(source_node)
6407     self._WaitUntilSync()
6408     self._GoStandalone()
6409     self._GoReconnect(False)
6410     self._WaitUntilSync()
6411
6412     self.feedback_fn("* done")
6413
6414   def Exec(self, feedback_fn):
6415     """Perform the migration.
6416
6417     """
6418     feedback_fn("Migrating instance %s" % self.instance.name)
6419
6420     self.feedback_fn = feedback_fn
6421
6422     self.source_node = self.instance.primary_node
6423     self.target_node = self.instance.secondary_nodes[0]
6424     self.all_nodes = [self.source_node, self.target_node]
6425     self.nodes_ip = {
6426       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6427       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6428       }
6429
6430     if self.cleanup:
6431       return self._ExecCleanup()
6432     else:
6433       return self._ExecMigration()
6434
6435
6436 def _CreateBlockDev(lu, node, instance, device, force_create,
6437                     info, force_open):
6438   """Create a tree of block devices on a given node.
6439
6440   If this device type has to be created on secondaries, create it and
6441   all its children.
6442
6443   If not, just recurse to children keeping the same 'force' value.
6444
6445   @param lu: the lu on whose behalf we execute
6446   @param node: the node on which to create the device
6447   @type instance: L{objects.Instance}
6448   @param instance: the instance which owns the device
6449   @type device: L{objects.Disk}
6450   @param device: the device to create
6451   @type force_create: boolean
6452   @param force_create: whether to force creation of this device; this
6453       will be change to True whenever we find a device which has
6454       CreateOnSecondary() attribute
6455   @param info: the extra 'metadata' we should attach to the device
6456       (this will be represented as a LVM tag)
6457   @type force_open: boolean
6458   @param force_open: this parameter will be passes to the
6459       L{backend.BlockdevCreate} function where it specifies
6460       whether we run on primary or not, and it affects both
6461       the child assembly and the device own Open() execution
6462
6463   """
6464   if device.CreateOnSecondary():
6465     force_create = True
6466
6467   if device.children:
6468     for child in device.children:
6469       _CreateBlockDev(lu, node, instance, child, force_create,
6470                       info, force_open)
6471
6472   if not force_create:
6473     return
6474
6475   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6476
6477
6478 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6479   """Create a single block device on a given node.
6480
6481   This will not recurse over children of the device, so they must be
6482   created in advance.
6483
6484   @param lu: the lu on whose behalf we execute
6485   @param node: the node on which to create the device
6486   @type instance: L{objects.Instance}
6487   @param instance: the instance which owns the device
6488   @type device: L{objects.Disk}
6489   @param device: the device to create
6490   @param info: the extra 'metadata' we should attach to the device
6491       (this will be represented as a LVM tag)
6492   @type force_open: boolean
6493   @param force_open: this parameter will be passes to the
6494       L{backend.BlockdevCreate} function where it specifies
6495       whether we run on primary or not, and it affects both
6496       the child assembly and the device own Open() execution
6497
6498   """
6499   lu.cfg.SetDiskID(device, node)
6500   result = lu.rpc.call_blockdev_create(node, device, device.size,
6501                                        instance.name, force_open, info)
6502   result.Raise("Can't create block device %s on"
6503                " node %s for instance %s" % (device, node, instance.name))
6504   if device.physical_id is None:
6505     device.physical_id = result.payload
6506
6507
6508 def _GenerateUniqueNames(lu, exts):
6509   """Generate a suitable LV name.
6510
6511   This will generate a logical volume name for the given instance.
6512
6513   """
6514   results = []
6515   for val in exts:
6516     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6517     results.append("%s%s" % (new_id, val))
6518   return results
6519
6520
6521 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6522                          p_minor, s_minor):
6523   """Generate a drbd8 device complete with its children.
6524
6525   """
6526   port = lu.cfg.AllocatePort()
6527   vgname = lu.cfg.GetVGName()
6528   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6529   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6530                           logical_id=(vgname, names[0]))
6531   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6532                           logical_id=(vgname, names[1]))
6533   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6534                           logical_id=(primary, secondary, port,
6535                                       p_minor, s_minor,
6536                                       shared_secret),
6537                           children=[dev_data, dev_meta],
6538                           iv_name=iv_name)
6539   return drbd_dev
6540
6541
6542 def _GenerateDiskTemplate(lu, template_name,
6543                           instance_name, primary_node,
6544                           secondary_nodes, disk_info,
6545                           file_storage_dir, file_driver,
6546                           base_index):
6547   """Generate the entire disk layout for a given template type.
6548
6549   """
6550   #TODO: compute space requirements
6551
6552   vgname = lu.cfg.GetVGName()
6553   disk_count = len(disk_info)
6554   disks = []
6555   if template_name == constants.DT_DISKLESS:
6556     pass
6557   elif template_name == constants.DT_PLAIN:
6558     if len(secondary_nodes) != 0:
6559       raise errors.ProgrammerError("Wrong template configuration")
6560
6561     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6562                                       for i in range(disk_count)])
6563     for idx, disk in enumerate(disk_info):
6564       disk_index = idx + base_index
6565       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6566                               logical_id=(vgname, names[idx]),
6567                               iv_name="disk/%d" % disk_index,
6568                               mode=disk["mode"])
6569       disks.append(disk_dev)
6570   elif template_name == constants.DT_DRBD8:
6571     if len(secondary_nodes) != 1:
6572       raise errors.ProgrammerError("Wrong template configuration")
6573     remote_node = secondary_nodes[0]
6574     minors = lu.cfg.AllocateDRBDMinor(
6575       [primary_node, remote_node] * len(disk_info), instance_name)
6576
6577     names = []
6578     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6579                                                for i in range(disk_count)]):
6580       names.append(lv_prefix + "_data")
6581       names.append(lv_prefix + "_meta")
6582     for idx, disk in enumerate(disk_info):
6583       disk_index = idx + base_index
6584       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6585                                       disk["size"], names[idx*2:idx*2+2],
6586                                       "disk/%d" % disk_index,
6587                                       minors[idx*2], minors[idx*2+1])
6588       disk_dev.mode = disk["mode"]
6589       disks.append(disk_dev)
6590   elif template_name == constants.DT_FILE:
6591     if len(secondary_nodes) != 0:
6592       raise errors.ProgrammerError("Wrong template configuration")
6593
6594     _RequireFileStorage()
6595
6596     for idx, disk in enumerate(disk_info):
6597       disk_index = idx + base_index
6598       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6599                               iv_name="disk/%d" % disk_index,
6600                               logical_id=(file_driver,
6601                                           "%s/disk%d" % (file_storage_dir,
6602                                                          disk_index)),
6603                               mode=disk["mode"])
6604       disks.append(disk_dev)
6605   else:
6606     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6607   return disks
6608
6609
6610 def _GetInstanceInfoText(instance):
6611   """Compute that text that should be added to the disk's metadata.
6612
6613   """
6614   return "originstname+%s" % instance.name
6615
6616
6617 def _CalcEta(time_taken, written, total_size):
6618   """Calculates the ETA based on size written and total size.
6619
6620   @param time_taken: The time taken so far
6621   @param written: amount written so far
6622   @param total_size: The total size of data to be written
6623   @return: The remaining time in seconds
6624
6625   """
6626   avg_time = time_taken / float(written)
6627   return (total_size - written) * avg_time
6628
6629
6630 def _WipeDisks(lu, instance):
6631   """Wipes instance disks.
6632
6633   @type lu: L{LogicalUnit}
6634   @param lu: the logical unit on whose behalf we execute
6635   @type instance: L{objects.Instance}
6636   @param instance: the instance whose disks we should create
6637   @return: the success of the wipe
6638
6639   """
6640   node = instance.primary_node
6641   for idx, device in enumerate(instance.disks):
6642     lu.LogInfo("* Wiping disk %d", idx)
6643     logging.info("Wiping disk %d for instance %s", idx, instance.name)
6644
6645     # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
6646     # MAX_WIPE_CHUNK at max
6647     wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
6648                           constants.MIN_WIPE_CHUNK_PERCENT)
6649
6650     offset = 0
6651     size = device.size
6652     last_output = 0
6653     start_time = time.time()
6654
6655     while offset < size:
6656       wipe_size = min(wipe_chunk_size, size - offset)
6657       result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
6658       result.Raise("Could not wipe disk %d at offset %d for size %d" %
6659                    (idx, offset, wipe_size))
6660       now = time.time()
6661       offset += wipe_size
6662       if now - last_output >= 60:
6663         eta = _CalcEta(now - start_time, offset, size)
6664         lu.LogInfo(" - done: %.1f%% ETA: %s" %
6665                    (offset / float(size) * 100, utils.FormatSeconds(eta)))
6666         last_output = now
6667
6668
6669 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6670   """Create all disks for an instance.
6671
6672   This abstracts away some work from AddInstance.
6673
6674   @type lu: L{LogicalUnit}
6675   @param lu: the logical unit on whose behalf we execute
6676   @type instance: L{objects.Instance}
6677   @param instance: the instance whose disks we should create
6678   @type to_skip: list
6679   @param to_skip: list of indices to skip
6680   @type target_node: string
6681   @param target_node: if passed, overrides the target node for creation
6682   @rtype: boolean
6683   @return: the success of the creation
6684
6685   """
6686   info = _GetInstanceInfoText(instance)
6687   if target_node is None:
6688     pnode = instance.primary_node
6689     all_nodes = instance.all_nodes
6690   else:
6691     pnode = target_node
6692     all_nodes = [pnode]
6693
6694   if instance.disk_template == constants.DT_FILE:
6695     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6696     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6697
6698     result.Raise("Failed to create directory '%s' on"
6699                  " node %s" % (file_storage_dir, pnode))
6700
6701   # Note: this needs to be kept in sync with adding of disks in
6702   # LUSetInstanceParams
6703   for idx, device in enumerate(instance.disks):
6704     if to_skip and idx in to_skip:
6705       continue
6706     logging.info("Creating volume %s for instance %s",
6707                  device.iv_name, instance.name)
6708     #HARDCODE
6709     for node in all_nodes:
6710       f_create = node == pnode
6711       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6712
6713
6714 def _RemoveDisks(lu, instance, target_node=None):
6715   """Remove all disks for an instance.
6716
6717   This abstracts away some work from `AddInstance()` and
6718   `RemoveInstance()`. Note that in case some of the devices couldn't
6719   be removed, the removal will continue with the other ones (compare
6720   with `_CreateDisks()`).
6721
6722   @type lu: L{LogicalUnit}
6723   @param lu: the logical unit on whose behalf we execute
6724   @type instance: L{objects.Instance}
6725   @param instance: the instance whose disks we should remove
6726   @type target_node: string
6727   @param target_node: used to override the node on which to remove the disks
6728   @rtype: boolean
6729   @return: the success of the removal
6730
6731   """
6732   logging.info("Removing block devices for instance %s", instance.name)
6733
6734   all_result = True
6735   for device in instance.disks:
6736     if target_node:
6737       edata = [(target_node, device)]
6738     else:
6739       edata = device.ComputeNodeTree(instance.primary_node)
6740     for node, disk in edata:
6741       lu.cfg.SetDiskID(disk, node)
6742       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6743       if msg:
6744         lu.LogWarning("Could not remove block device %s on node %s,"
6745                       " continuing anyway: %s", device.iv_name, node, msg)
6746         all_result = False
6747
6748   if instance.disk_template == constants.DT_FILE:
6749     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6750     if target_node:
6751       tgt = target_node
6752     else:
6753       tgt = instance.primary_node
6754     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6755     if result.fail_msg:
6756       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6757                     file_storage_dir, instance.primary_node, result.fail_msg)
6758       all_result = False
6759
6760   return all_result
6761
6762
6763 def _ComputeDiskSize(disk_template, disks):
6764   """Compute disk size requirements in the volume group
6765
6766   """
6767   # Required free disk space as a function of disk and swap space
6768   req_size_dict = {
6769     constants.DT_DISKLESS: None,
6770     constants.DT_PLAIN: sum(d["size"] for d in disks),
6771     # 128 MB are added for drbd metadata for each disk
6772     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6773     constants.DT_FILE: None,
6774   }
6775
6776   if disk_template not in req_size_dict:
6777     raise errors.ProgrammerError("Disk template '%s' size requirement"
6778                                  " is unknown" %  disk_template)
6779
6780   return req_size_dict[disk_template]
6781
6782
6783 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6784   """Hypervisor parameter validation.
6785
6786   This function abstract the hypervisor parameter validation to be
6787   used in both instance create and instance modify.
6788
6789   @type lu: L{LogicalUnit}
6790   @param lu: the logical unit for which we check
6791   @type nodenames: list
6792   @param nodenames: the list of nodes on which we should check
6793   @type hvname: string
6794   @param hvname: the name of the hypervisor we should use
6795   @type hvparams: dict
6796   @param hvparams: the parameters which we need to check
6797   @raise errors.OpPrereqError: if the parameters are not valid
6798
6799   """
6800   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6801                                                   hvname,
6802                                                   hvparams)
6803   for node in nodenames:
6804     info = hvinfo[node]
6805     if info.offline:
6806       continue
6807     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6808
6809
6810 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6811   """OS parameters validation.
6812
6813   @type lu: L{LogicalUnit}
6814   @param lu: the logical unit for which we check
6815   @type required: boolean
6816   @param required: whether the validation should fail if the OS is not
6817       found
6818   @type nodenames: list
6819   @param nodenames: the list of nodes on which we should check
6820   @type osname: string
6821   @param osname: the name of the hypervisor we should use
6822   @type osparams: dict
6823   @param osparams: the parameters which we need to check
6824   @raise errors.OpPrereqError: if the parameters are not valid
6825
6826   """
6827   result = lu.rpc.call_os_validate(required, nodenames, osname,
6828                                    [constants.OS_VALIDATE_PARAMETERS],
6829                                    osparams)
6830   for node, nres in result.items():
6831     # we don't check for offline cases since this should be run only
6832     # against the master node and/or an instance's nodes
6833     nres.Raise("OS Parameters validation failed on node %s" % node)
6834     if not nres.payload:
6835       lu.LogInfo("OS %s not found on node %s, validation skipped",
6836                  osname, node)
6837
6838
6839 class LUCreateInstance(LogicalUnit):
6840   """Create an instance.
6841
6842   """
6843   HPATH = "instance-add"
6844   HTYPE = constants.HTYPE_INSTANCE
6845   _OP_PARAMS = [
6846     _PInstanceName,
6847     ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
6848     ("start", True, ht.TBool),
6849     ("wait_for_sync", True, ht.TBool),
6850     ("ip_check", True, ht.TBool),
6851     ("name_check", True, ht.TBool),
6852     ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
6853     ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
6854     ("hvparams", ht.EmptyDict, ht.TDict),
6855     ("beparams", ht.EmptyDict, ht.TDict),
6856     ("osparams", ht.EmptyDict, ht.TDict),
6857     ("no_install", None, ht.TMaybeBool),
6858     ("os_type", None, ht.TMaybeString),
6859     ("force_variant", False, ht.TBool),
6860     ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
6861     ("source_x509_ca", None, ht.TMaybeString),
6862     ("source_instance_name", None, ht.TMaybeString),
6863     ("source_shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
6864      ht.TPositiveInt),
6865     ("src_node", None, ht.TMaybeString),
6866     ("src_path", None, ht.TMaybeString),
6867     ("pnode", None, ht.TMaybeString),
6868     ("snode", None, ht.TMaybeString),
6869     ("iallocator", None, ht.TMaybeString),
6870     ("hypervisor", None, ht.TMaybeString),
6871     ("disk_template", ht.NoDefault, _CheckDiskTemplate),
6872     ("identify_defaults", False, ht.TBool),
6873     ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
6874     ("file_storage_dir", None, ht.TMaybeString),
6875     ]
6876   REQ_BGL = False
6877
6878   def CheckArguments(self):
6879     """Check arguments.
6880
6881     """
6882     # do not require name_check to ease forward/backward compatibility
6883     # for tools
6884     if self.op.no_install and self.op.start:
6885       self.LogInfo("No-installation mode selected, disabling startup")
6886       self.op.start = False
6887     # validate/normalize the instance name
6888     self.op.instance_name = \
6889       netutils.Hostname.GetNormalizedName(self.op.instance_name)
6890
6891     if self.op.ip_check and not self.op.name_check:
6892       # TODO: make the ip check more flexible and not depend on the name check
6893       raise errors.OpPrereqError("Cannot do ip check without a name check",
6894                                  errors.ECODE_INVAL)
6895
6896     # check nics' parameter names
6897     for nic in self.op.nics:
6898       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6899
6900     # check disks. parameter names and consistent adopt/no-adopt strategy
6901     has_adopt = has_no_adopt = False
6902     for disk in self.op.disks:
6903       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6904       if "adopt" in disk:
6905         has_adopt = True
6906       else:
6907         has_no_adopt = True
6908     if has_adopt and has_no_adopt:
6909       raise errors.OpPrereqError("Either all disks are adopted or none is",
6910                                  errors.ECODE_INVAL)
6911     if has_adopt:
6912       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6913         raise errors.OpPrereqError("Disk adoption is not supported for the"
6914                                    " '%s' disk template" %
6915                                    self.op.disk_template,
6916                                    errors.ECODE_INVAL)
6917       if self.op.iallocator is not None:
6918         raise errors.OpPrereqError("Disk adoption not allowed with an"
6919                                    " iallocator script", errors.ECODE_INVAL)
6920       if self.op.mode == constants.INSTANCE_IMPORT:
6921         raise errors.OpPrereqError("Disk adoption not allowed for"
6922                                    " instance import", errors.ECODE_INVAL)
6923
6924     self.adopt_disks = has_adopt
6925
6926     # instance name verification
6927     if self.op.name_check:
6928       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6929       self.op.instance_name = self.hostname1.name
6930       # used in CheckPrereq for ip ping check
6931       self.check_ip = self.hostname1.ip
6932     else:
6933       self.check_ip = None
6934
6935     # file storage checks
6936     if (self.op.file_driver and
6937         not self.op.file_driver in constants.FILE_DRIVER):
6938       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6939                                  self.op.file_driver, errors.ECODE_INVAL)
6940
6941     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6942       raise errors.OpPrereqError("File storage directory path not absolute",
6943                                  errors.ECODE_INVAL)
6944
6945     ### Node/iallocator related checks
6946     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6947
6948     if self.op.pnode is not None:
6949       if self.op.disk_template in constants.DTS_NET_MIRROR:
6950         if self.op.snode is None:
6951           raise errors.OpPrereqError("The networked disk templates need"
6952                                      " a mirror node", errors.ECODE_INVAL)
6953       elif self.op.snode:
6954         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6955                         " template")
6956         self.op.snode = None
6957
6958     self._cds = _GetClusterDomainSecret()
6959
6960     if self.op.mode == constants.INSTANCE_IMPORT:
6961       # On import force_variant must be True, because if we forced it at
6962       # initial install, our only chance when importing it back is that it
6963       # works again!
6964       self.op.force_variant = True
6965
6966       if self.op.no_install:
6967         self.LogInfo("No-installation mode has no effect during import")
6968
6969     elif self.op.mode == constants.INSTANCE_CREATE:
6970       if self.op.os_type is None:
6971         raise errors.OpPrereqError("No guest OS specified",
6972                                    errors.ECODE_INVAL)
6973       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6974         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6975                                    " installation" % self.op.os_type,
6976                                    errors.ECODE_STATE)
6977       if self.op.disk_template is None:
6978         raise errors.OpPrereqError("No disk template specified",
6979                                    errors.ECODE_INVAL)
6980
6981     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6982       # Check handshake to ensure both clusters have the same domain secret
6983       src_handshake = self.op.source_handshake
6984       if not src_handshake:
6985         raise errors.OpPrereqError("Missing source handshake",
6986                                    errors.ECODE_INVAL)
6987
6988       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6989                                                            src_handshake)
6990       if errmsg:
6991         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6992                                    errors.ECODE_INVAL)
6993
6994       # Load and check source CA
6995       self.source_x509_ca_pem = self.op.source_x509_ca
6996       if not self.source_x509_ca_pem:
6997         raise errors.OpPrereqError("Missing source X509 CA",
6998                                    errors.ECODE_INVAL)
6999
7000       try:
7001         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7002                                                     self._cds)
7003       except OpenSSL.crypto.Error, err:
7004         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7005                                    (err, ), errors.ECODE_INVAL)
7006
7007       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7008       if errcode is not None:
7009         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7010                                    errors.ECODE_INVAL)
7011
7012       self.source_x509_ca = cert
7013
7014       src_instance_name = self.op.source_instance_name
7015       if not src_instance_name:
7016         raise errors.OpPrereqError("Missing source instance name",
7017                                    errors.ECODE_INVAL)
7018
7019       self.source_instance_name = \
7020           netutils.GetHostname(name=src_instance_name).name
7021
7022     else:
7023       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7024                                  self.op.mode, errors.ECODE_INVAL)
7025
7026   def ExpandNames(self):
7027     """ExpandNames for CreateInstance.
7028
7029     Figure out the right locks for instance creation.
7030
7031     """
7032     self.needed_locks = {}
7033
7034     instance_name = self.op.instance_name
7035     # this is just a preventive check, but someone might still add this
7036     # instance in the meantime, and creation will fail at lock-add time
7037     if instance_name in self.cfg.GetInstanceList():
7038       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7039                                  instance_name, errors.ECODE_EXISTS)
7040
7041     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7042
7043     if self.op.iallocator:
7044       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7045     else:
7046       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7047       nodelist = [self.op.pnode]
7048       if self.op.snode is not None:
7049         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7050         nodelist.append(self.op.snode)
7051       self.needed_locks[locking.LEVEL_NODE] = nodelist
7052
7053     # in case of import lock the source node too
7054     if self.op.mode == constants.INSTANCE_IMPORT:
7055       src_node = self.op.src_node
7056       src_path = self.op.src_path
7057
7058       if src_path is None:
7059         self.op.src_path = src_path = self.op.instance_name
7060
7061       if src_node is None:
7062         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7063         self.op.src_node = None
7064         if os.path.isabs(src_path):
7065           raise errors.OpPrereqError("Importing an instance from an absolute"
7066                                      " path requires a source node option.",
7067                                      errors.ECODE_INVAL)
7068       else:
7069         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7070         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7071           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7072         if not os.path.isabs(src_path):
7073           self.op.src_path = src_path = \
7074             utils.PathJoin(constants.EXPORT_DIR, src_path)
7075
7076   def _RunAllocator(self):
7077     """Run the allocator based on input opcode.
7078
7079     """
7080     nics = [n.ToDict() for n in self.nics]
7081     ial = IAllocator(self.cfg, self.rpc,
7082                      mode=constants.IALLOCATOR_MODE_ALLOC,
7083                      name=self.op.instance_name,
7084                      disk_template=self.op.disk_template,
7085                      tags=[],
7086                      os=self.op.os_type,
7087                      vcpus=self.be_full[constants.BE_VCPUS],
7088                      mem_size=self.be_full[constants.BE_MEMORY],
7089                      disks=self.disks,
7090                      nics=nics,
7091                      hypervisor=self.op.hypervisor,
7092                      )
7093
7094     ial.Run(self.op.iallocator)
7095
7096     if not ial.success:
7097       raise errors.OpPrereqError("Can't compute nodes using"
7098                                  " iallocator '%s': %s" %
7099                                  (self.op.iallocator, ial.info),
7100                                  errors.ECODE_NORES)
7101     if len(ial.result) != ial.required_nodes:
7102       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7103                                  " of nodes (%s), required %s" %
7104                                  (self.op.iallocator, len(ial.result),
7105                                   ial.required_nodes), errors.ECODE_FAULT)
7106     self.op.pnode = ial.result[0]
7107     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7108                  self.op.instance_name, self.op.iallocator,
7109                  utils.CommaJoin(ial.result))
7110     if ial.required_nodes == 2:
7111       self.op.snode = ial.result[1]
7112
7113   def BuildHooksEnv(self):
7114     """Build hooks env.
7115
7116     This runs on master, primary and secondary nodes of the instance.
7117
7118     """
7119     env = {
7120       "ADD_MODE": self.op.mode,
7121       }
7122     if self.op.mode == constants.INSTANCE_IMPORT:
7123       env["SRC_NODE"] = self.op.src_node
7124       env["SRC_PATH"] = self.op.src_path
7125       env["SRC_IMAGES"] = self.src_images
7126
7127     env.update(_BuildInstanceHookEnv(
7128       name=self.op.instance_name,
7129       primary_node=self.op.pnode,
7130       secondary_nodes=self.secondaries,
7131       status=self.op.start,
7132       os_type=self.op.os_type,
7133       memory=self.be_full[constants.BE_MEMORY],
7134       vcpus=self.be_full[constants.BE_VCPUS],
7135       nics=_NICListToTuple(self, self.nics),
7136       disk_template=self.op.disk_template,
7137       disks=[(d["size"], d["mode"]) for d in self.disks],
7138       bep=self.be_full,
7139       hvp=self.hv_full,
7140       hypervisor_name=self.op.hypervisor,
7141     ))
7142
7143     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
7144           self.secondaries)
7145     return env, nl, nl
7146
7147   def _ReadExportInfo(self):
7148     """Reads the export information from disk.
7149
7150     It will override the opcode source node and path with the actual
7151     information, if these two were not specified before.
7152
7153     @return: the export information
7154
7155     """
7156     assert self.op.mode == constants.INSTANCE_IMPORT
7157
7158     src_node = self.op.src_node
7159     src_path = self.op.src_path
7160
7161     if src_node is None:
7162       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7163       exp_list = self.rpc.call_export_list(locked_nodes)
7164       found = False
7165       for node in exp_list:
7166         if exp_list[node].fail_msg:
7167           continue
7168         if src_path in exp_list[node].payload:
7169           found = True
7170           self.op.src_node = src_node = node
7171           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7172                                                        src_path)
7173           break
7174       if not found:
7175         raise errors.OpPrereqError("No export found for relative path %s" %
7176                                     src_path, errors.ECODE_INVAL)
7177
7178     _CheckNodeOnline(self, src_node)
7179     result = self.rpc.call_export_info(src_node, src_path)
7180     result.Raise("No export or invalid export found in dir %s" % src_path)
7181
7182     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7183     if not export_info.has_section(constants.INISECT_EXP):
7184       raise errors.ProgrammerError("Corrupted export config",
7185                                    errors.ECODE_ENVIRON)
7186
7187     ei_version = export_info.get(constants.INISECT_EXP, "version")
7188     if (int(ei_version) != constants.EXPORT_VERSION):
7189       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7190                                  (ei_version, constants.EXPORT_VERSION),
7191                                  errors.ECODE_ENVIRON)
7192     return export_info
7193
7194   def _ReadExportParams(self, einfo):
7195     """Use export parameters as defaults.
7196
7197     In case the opcode doesn't specify (as in override) some instance
7198     parameters, then try to use them from the export information, if
7199     that declares them.
7200
7201     """
7202     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7203
7204     if self.op.disk_template is None:
7205       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7206         self.op.disk_template = einfo.get(constants.INISECT_INS,
7207                                           "disk_template")
7208       else:
7209         raise errors.OpPrereqError("No disk template specified and the export"
7210                                    " is missing the disk_template information",
7211                                    errors.ECODE_INVAL)
7212
7213     if not self.op.disks:
7214       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7215         disks = []
7216         # TODO: import the disk iv_name too
7217         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7218           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7219           disks.append({"size": disk_sz})
7220         self.op.disks = disks
7221       else:
7222         raise errors.OpPrereqError("No disk info specified and the export"
7223                                    " is missing the disk information",
7224                                    errors.ECODE_INVAL)
7225
7226     if (not self.op.nics and
7227         einfo.has_option(constants.INISECT_INS, "nic_count")):
7228       nics = []
7229       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7230         ndict = {}
7231         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7232           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7233           ndict[name] = v
7234         nics.append(ndict)
7235       self.op.nics = nics
7236
7237     if (self.op.hypervisor is None and
7238         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7239       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7240     if einfo.has_section(constants.INISECT_HYP):
7241       # use the export parameters but do not override the ones
7242       # specified by the user
7243       for name, value in einfo.items(constants.INISECT_HYP):
7244         if name not in self.op.hvparams:
7245           self.op.hvparams[name] = value
7246
7247     if einfo.has_section(constants.INISECT_BEP):
7248       # use the parameters, without overriding
7249       for name, value in einfo.items(constants.INISECT_BEP):
7250         if name not in self.op.beparams:
7251           self.op.beparams[name] = value
7252     else:
7253       # try to read the parameters old style, from the main section
7254       for name in constants.BES_PARAMETERS:
7255         if (name not in self.op.beparams and
7256             einfo.has_option(constants.INISECT_INS, name)):
7257           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7258
7259     if einfo.has_section(constants.INISECT_OSP):
7260       # use the parameters, without overriding
7261       for name, value in einfo.items(constants.INISECT_OSP):
7262         if name not in self.op.osparams:
7263           self.op.osparams[name] = value
7264
7265   def _RevertToDefaults(self, cluster):
7266     """Revert the instance parameters to the default values.
7267
7268     """
7269     # hvparams
7270     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7271     for name in self.op.hvparams.keys():
7272       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7273         del self.op.hvparams[name]
7274     # beparams
7275     be_defs = cluster.SimpleFillBE({})
7276     for name in self.op.beparams.keys():
7277       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7278         del self.op.beparams[name]
7279     # nic params
7280     nic_defs = cluster.SimpleFillNIC({})
7281     for nic in self.op.nics:
7282       for name in constants.NICS_PARAMETERS:
7283         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7284           del nic[name]
7285     # osparams
7286     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7287     for name in self.op.osparams.keys():
7288       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7289         del self.op.osparams[name]
7290
7291   def CheckPrereq(self):
7292     """Check prerequisites.
7293
7294     """
7295     if self.op.mode == constants.INSTANCE_IMPORT:
7296       export_info = self._ReadExportInfo()
7297       self._ReadExportParams(export_info)
7298
7299     _CheckDiskTemplate(self.op.disk_template)
7300
7301     if (not self.cfg.GetVGName() and
7302         self.op.disk_template not in constants.DTS_NOT_LVM):
7303       raise errors.OpPrereqError("Cluster does not support lvm-based"
7304                                  " instances", errors.ECODE_STATE)
7305
7306     if self.op.hypervisor is None:
7307       self.op.hypervisor = self.cfg.GetHypervisorType()
7308
7309     cluster = self.cfg.GetClusterInfo()
7310     enabled_hvs = cluster.enabled_hypervisors
7311     if self.op.hypervisor not in enabled_hvs:
7312       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7313                                  " cluster (%s)" % (self.op.hypervisor,
7314                                   ",".join(enabled_hvs)),
7315                                  errors.ECODE_STATE)
7316
7317     # check hypervisor parameter syntax (locally)
7318     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7319     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7320                                       self.op.hvparams)
7321     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7322     hv_type.CheckParameterSyntax(filled_hvp)
7323     self.hv_full = filled_hvp
7324     # check that we don't specify global parameters on an instance
7325     _CheckGlobalHvParams(self.op.hvparams)
7326
7327     # fill and remember the beparams dict
7328     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7329     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7330
7331     # build os parameters
7332     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7333
7334     # now that hvp/bep are in final format, let's reset to defaults,
7335     # if told to do so
7336     if self.op.identify_defaults:
7337       self._RevertToDefaults(cluster)
7338
7339     # NIC buildup
7340     self.nics = []
7341     for idx, nic in enumerate(self.op.nics):
7342       nic_mode_req = nic.get("mode", None)
7343       nic_mode = nic_mode_req
7344       if nic_mode is None:
7345         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7346
7347       # in routed mode, for the first nic, the default ip is 'auto'
7348       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7349         default_ip_mode = constants.VALUE_AUTO
7350       else:
7351         default_ip_mode = constants.VALUE_NONE
7352
7353       # ip validity checks
7354       ip = nic.get("ip", default_ip_mode)
7355       if ip is None or ip.lower() == constants.VALUE_NONE:
7356         nic_ip = None
7357       elif ip.lower() == constants.VALUE_AUTO:
7358         if not self.op.name_check:
7359           raise errors.OpPrereqError("IP address set to auto but name checks"
7360                                      " have been skipped",
7361                                      errors.ECODE_INVAL)
7362         nic_ip = self.hostname1.ip
7363       else:
7364         if not netutils.IPAddress.IsValid(ip):
7365           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7366                                      errors.ECODE_INVAL)
7367         nic_ip = ip
7368
7369       # TODO: check the ip address for uniqueness
7370       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7371         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7372                                    errors.ECODE_INVAL)
7373
7374       # MAC address verification
7375       mac = nic.get("mac", constants.VALUE_AUTO)
7376       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7377         mac = utils.NormalizeAndValidateMac(mac)
7378
7379         try:
7380           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7381         except errors.ReservationError:
7382           raise errors.OpPrereqError("MAC address %s already in use"
7383                                      " in cluster" % mac,
7384                                      errors.ECODE_NOTUNIQUE)
7385
7386       # bridge verification
7387       bridge = nic.get("bridge", None)
7388       link = nic.get("link", None)
7389       if bridge and link:
7390         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7391                                    " at the same time", errors.ECODE_INVAL)
7392       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7393         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7394                                    errors.ECODE_INVAL)
7395       elif bridge:
7396         link = bridge
7397
7398       nicparams = {}
7399       if nic_mode_req:
7400         nicparams[constants.NIC_MODE] = nic_mode_req
7401       if link:
7402         nicparams[constants.NIC_LINK] = link
7403
7404       check_params = cluster.SimpleFillNIC(nicparams)
7405       objects.NIC.CheckParameterSyntax(check_params)
7406       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7407
7408     # disk checks/pre-build
7409     self.disks = []
7410     for disk in self.op.disks:
7411       mode = disk.get("mode", constants.DISK_RDWR)
7412       if mode not in constants.DISK_ACCESS_SET:
7413         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7414                                    mode, errors.ECODE_INVAL)
7415       size = disk.get("size", None)
7416       if size is None:
7417         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7418       try:
7419         size = int(size)
7420       except (TypeError, ValueError):
7421         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7422                                    errors.ECODE_INVAL)
7423       new_disk = {"size": size, "mode": mode}
7424       if "adopt" in disk:
7425         new_disk["adopt"] = disk["adopt"]
7426       self.disks.append(new_disk)
7427
7428     if self.op.mode == constants.INSTANCE_IMPORT:
7429
7430       # Check that the new instance doesn't have less disks than the export
7431       instance_disks = len(self.disks)
7432       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7433       if instance_disks < export_disks:
7434         raise errors.OpPrereqError("Not enough disks to import."
7435                                    " (instance: %d, export: %d)" %
7436                                    (instance_disks, export_disks),
7437                                    errors.ECODE_INVAL)
7438
7439       disk_images = []
7440       for idx in range(export_disks):
7441         option = 'disk%d_dump' % idx
7442         if export_info.has_option(constants.INISECT_INS, option):
7443           # FIXME: are the old os-es, disk sizes, etc. useful?
7444           export_name = export_info.get(constants.INISECT_INS, option)
7445           image = utils.PathJoin(self.op.src_path, export_name)
7446           disk_images.append(image)
7447         else:
7448           disk_images.append(False)
7449
7450       self.src_images = disk_images
7451
7452       old_name = export_info.get(constants.INISECT_INS, 'name')
7453       try:
7454         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7455       except (TypeError, ValueError), err:
7456         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7457                                    " an integer: %s" % str(err),
7458                                    errors.ECODE_STATE)
7459       if self.op.instance_name == old_name:
7460         for idx, nic in enumerate(self.nics):
7461           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7462             nic_mac_ini = 'nic%d_mac' % idx
7463             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7464
7465     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7466
7467     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7468     if self.op.ip_check:
7469       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7470         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7471                                    (self.check_ip, self.op.instance_name),
7472                                    errors.ECODE_NOTUNIQUE)
7473
7474     #### mac address generation
7475     # By generating here the mac address both the allocator and the hooks get
7476     # the real final mac address rather than the 'auto' or 'generate' value.
7477     # There is a race condition between the generation and the instance object
7478     # creation, which means that we know the mac is valid now, but we're not
7479     # sure it will be when we actually add the instance. If things go bad
7480     # adding the instance will abort because of a duplicate mac, and the
7481     # creation job will fail.
7482     for nic in self.nics:
7483       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7484         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7485
7486     #### allocator run
7487
7488     if self.op.iallocator is not None:
7489       self._RunAllocator()
7490
7491     #### node related checks
7492
7493     # check primary node
7494     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7495     assert self.pnode is not None, \
7496       "Cannot retrieve locked node %s" % self.op.pnode
7497     if pnode.offline:
7498       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7499                                  pnode.name, errors.ECODE_STATE)
7500     if pnode.drained:
7501       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7502                                  pnode.name, errors.ECODE_STATE)
7503     if not pnode.vm_capable:
7504       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
7505                                  " '%s'" % pnode.name, errors.ECODE_STATE)
7506
7507     self.secondaries = []
7508
7509     # mirror node verification
7510     if self.op.disk_template in constants.DTS_NET_MIRROR:
7511       if self.op.snode == pnode.name:
7512         raise errors.OpPrereqError("The secondary node cannot be the"
7513                                    " primary node.", errors.ECODE_INVAL)
7514       _CheckNodeOnline(self, self.op.snode)
7515       _CheckNodeNotDrained(self, self.op.snode)
7516       _CheckNodeVmCapable(self, self.op.snode)
7517       self.secondaries.append(self.op.snode)
7518
7519     nodenames = [pnode.name] + self.secondaries
7520
7521     req_size = _ComputeDiskSize(self.op.disk_template,
7522                                 self.disks)
7523
7524     # Check lv size requirements, if not adopting
7525     if req_size is not None and not self.adopt_disks:
7526       _CheckNodesFreeDisk(self, nodenames, req_size)
7527
7528     if self.adopt_disks: # instead, we must check the adoption data
7529       all_lvs = set([i["adopt"] for i in self.disks])
7530       if len(all_lvs) != len(self.disks):
7531         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7532                                    errors.ECODE_INVAL)
7533       for lv_name in all_lvs:
7534         try:
7535           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7536         except errors.ReservationError:
7537           raise errors.OpPrereqError("LV named %s used by another instance" %
7538                                      lv_name, errors.ECODE_NOTUNIQUE)
7539
7540       node_lvs = self.rpc.call_lv_list([pnode.name],
7541                                        self.cfg.GetVGName())[pnode.name]
7542       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7543       node_lvs = node_lvs.payload
7544       delta = all_lvs.difference(node_lvs.keys())
7545       if delta:
7546         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7547                                    utils.CommaJoin(delta),
7548                                    errors.ECODE_INVAL)
7549       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7550       if online_lvs:
7551         raise errors.OpPrereqError("Online logical volumes found, cannot"
7552                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7553                                    errors.ECODE_STATE)
7554       # update the size of disk based on what is found
7555       for dsk in self.disks:
7556         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7557
7558     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7559
7560     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7561     # check OS parameters (remotely)
7562     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7563
7564     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7565
7566     # memory check on primary node
7567     if self.op.start:
7568       _CheckNodeFreeMemory(self, self.pnode.name,
7569                            "creating instance %s" % self.op.instance_name,
7570                            self.be_full[constants.BE_MEMORY],
7571                            self.op.hypervisor)
7572
7573     self.dry_run_result = list(nodenames)
7574
7575   def Exec(self, feedback_fn):
7576     """Create and add the instance to the cluster.
7577
7578     """
7579     instance = self.op.instance_name
7580     pnode_name = self.pnode.name
7581
7582     ht_kind = self.op.hypervisor
7583     if ht_kind in constants.HTS_REQ_PORT:
7584       network_port = self.cfg.AllocatePort()
7585     else:
7586       network_port = None
7587
7588     if constants.ENABLE_FILE_STORAGE:
7589       # this is needed because os.path.join does not accept None arguments
7590       if self.op.file_storage_dir is None:
7591         string_file_storage_dir = ""
7592       else:
7593         string_file_storage_dir = self.op.file_storage_dir
7594
7595       # build the full file storage dir path
7596       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7597                                         string_file_storage_dir, instance)
7598     else:
7599       file_storage_dir = ""
7600
7601     disks = _GenerateDiskTemplate(self,
7602                                   self.op.disk_template,
7603                                   instance, pnode_name,
7604                                   self.secondaries,
7605                                   self.disks,
7606                                   file_storage_dir,
7607                                   self.op.file_driver,
7608                                   0)
7609
7610     iobj = objects.Instance(name=instance, os=self.op.os_type,
7611                             primary_node=pnode_name,
7612                             nics=self.nics, disks=disks,
7613                             disk_template=self.op.disk_template,
7614                             admin_up=False,
7615                             network_port=network_port,
7616                             beparams=self.op.beparams,
7617                             hvparams=self.op.hvparams,
7618                             hypervisor=self.op.hypervisor,
7619                             osparams=self.op.osparams,
7620                             )
7621
7622     if self.adopt_disks:
7623       # rename LVs to the newly-generated names; we need to construct
7624       # 'fake' LV disks with the old data, plus the new unique_id
7625       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7626       rename_to = []
7627       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7628         rename_to.append(t_dsk.logical_id)
7629         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7630         self.cfg.SetDiskID(t_dsk, pnode_name)
7631       result = self.rpc.call_blockdev_rename(pnode_name,
7632                                              zip(tmp_disks, rename_to))
7633       result.Raise("Failed to rename adoped LVs")
7634     else:
7635       feedback_fn("* creating instance disks...")
7636       try:
7637         _CreateDisks(self, iobj)
7638       except errors.OpExecError:
7639         self.LogWarning("Device creation failed, reverting...")
7640         try:
7641           _RemoveDisks(self, iobj)
7642         finally:
7643           self.cfg.ReleaseDRBDMinors(instance)
7644           raise
7645
7646       if self.cfg.GetClusterInfo().prealloc_wipe_disks:
7647         feedback_fn("* wiping instance disks...")
7648         try:
7649           _WipeDisks(self, iobj)
7650         except errors.OpExecError:
7651           self.LogWarning("Device wiping failed, reverting...")
7652           try:
7653             _RemoveDisks(self, iobj)
7654           finally:
7655             self.cfg.ReleaseDRBDMinors(instance)
7656             raise
7657
7658     feedback_fn("adding instance %s to cluster config" % instance)
7659
7660     self.cfg.AddInstance(iobj, self.proc.GetECId())
7661
7662     # Declare that we don't want to remove the instance lock anymore, as we've
7663     # added the instance to the config
7664     del self.remove_locks[locking.LEVEL_INSTANCE]
7665     # Unlock all the nodes
7666     if self.op.mode == constants.INSTANCE_IMPORT:
7667       nodes_keep = [self.op.src_node]
7668       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7669                        if node != self.op.src_node]
7670       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7671       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7672     else:
7673       self.context.glm.release(locking.LEVEL_NODE)
7674       del self.acquired_locks[locking.LEVEL_NODE]
7675
7676     if self.op.wait_for_sync:
7677       disk_abort = not _WaitForSync(self, iobj)
7678     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7679       # make sure the disks are not degraded (still sync-ing is ok)
7680       time.sleep(15)
7681       feedback_fn("* checking mirrors status")
7682       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7683     else:
7684       disk_abort = False
7685
7686     if disk_abort:
7687       _RemoveDisks(self, iobj)
7688       self.cfg.RemoveInstance(iobj.name)
7689       # Make sure the instance lock gets removed
7690       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7691       raise errors.OpExecError("There are some degraded disks for"
7692                                " this instance")
7693
7694     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7695       if self.op.mode == constants.INSTANCE_CREATE:
7696         if not self.op.no_install:
7697           feedback_fn("* running the instance OS create scripts...")
7698           # FIXME: pass debug option from opcode to backend
7699           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7700                                                  self.op.debug_level)
7701           result.Raise("Could not add os for instance %s"
7702                        " on node %s" % (instance, pnode_name))
7703
7704       elif self.op.mode == constants.INSTANCE_IMPORT:
7705         feedback_fn("* running the instance OS import scripts...")
7706
7707         transfers = []
7708
7709         for idx, image in enumerate(self.src_images):
7710           if not image:
7711             continue
7712
7713           # FIXME: pass debug option from opcode to backend
7714           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7715                                              constants.IEIO_FILE, (image, ),
7716                                              constants.IEIO_SCRIPT,
7717                                              (iobj.disks[idx], idx),
7718                                              None)
7719           transfers.append(dt)
7720
7721         import_result = \
7722           masterd.instance.TransferInstanceData(self, feedback_fn,
7723                                                 self.op.src_node, pnode_name,
7724                                                 self.pnode.secondary_ip,
7725                                                 iobj, transfers)
7726         if not compat.all(import_result):
7727           self.LogWarning("Some disks for instance %s on node %s were not"
7728                           " imported successfully" % (instance, pnode_name))
7729
7730       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7731         feedback_fn("* preparing remote import...")
7732         # The source cluster will stop the instance before attempting to make a
7733         # connection. In some cases stopping an instance can take a long time,
7734         # hence the shutdown timeout is added to the connection timeout.
7735         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
7736                            self.op.source_shutdown_timeout)
7737         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7738
7739         disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7740                                                      self.source_x509_ca,
7741                                                      self._cds, timeouts)
7742         if not compat.all(disk_results):
7743           # TODO: Should the instance still be started, even if some disks
7744           # failed to import (valid for local imports, too)?
7745           self.LogWarning("Some disks for instance %s on node %s were not"
7746                           " imported successfully" % (instance, pnode_name))
7747
7748         # Run rename script on newly imported instance
7749         assert iobj.name == instance
7750         feedback_fn("Running rename script for %s" % instance)
7751         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7752                                                    self.source_instance_name,
7753                                                    self.op.debug_level)
7754         if result.fail_msg:
7755           self.LogWarning("Failed to run rename script for %s on node"
7756                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7757
7758       else:
7759         # also checked in the prereq part
7760         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7761                                      % self.op.mode)
7762
7763     if self.op.start:
7764       iobj.admin_up = True
7765       self.cfg.Update(iobj, feedback_fn)
7766       logging.info("Starting instance %s on node %s", instance, pnode_name)
7767       feedback_fn("* starting instance...")
7768       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7769       result.Raise("Could not start instance")
7770
7771     return list(iobj.all_nodes)
7772
7773
7774 class LUConnectConsole(NoHooksLU):
7775   """Connect to an instance's console.
7776
7777   This is somewhat special in that it returns the command line that
7778   you need to run on the master node in order to connect to the
7779   console.
7780
7781   """
7782   _OP_PARAMS = [
7783     _PInstanceName
7784     ]
7785   REQ_BGL = False
7786
7787   def ExpandNames(self):
7788     self._ExpandAndLockInstance()
7789
7790   def CheckPrereq(self):
7791     """Check prerequisites.
7792
7793     This checks that the instance is in the cluster.
7794
7795     """
7796     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7797     assert self.instance is not None, \
7798       "Cannot retrieve locked instance %s" % self.op.instance_name
7799     _CheckNodeOnline(self, self.instance.primary_node)
7800
7801   def Exec(self, feedback_fn):
7802     """Connect to the console of an instance
7803
7804     """
7805     instance = self.instance
7806     node = instance.primary_node
7807
7808     node_insts = self.rpc.call_instance_list([node],
7809                                              [instance.hypervisor])[node]
7810     node_insts.Raise("Can't get node information from %s" % node)
7811
7812     if instance.name not in node_insts.payload:
7813       if instance.admin_up:
7814         state = "ERROR_down"
7815       else:
7816         state = "ADMIN_down"
7817       raise errors.OpExecError("Instance %s is not running (state %s)" %
7818                                (instance.name, state))
7819
7820     logging.debug("Connecting to console of %s on %s", instance.name, node)
7821
7822     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7823     cluster = self.cfg.GetClusterInfo()
7824     # beparams and hvparams are passed separately, to avoid editing the
7825     # instance and then saving the defaults in the instance itself.
7826     hvparams = cluster.FillHV(instance)
7827     beparams = cluster.FillBE(instance)
7828     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7829
7830     # build ssh cmdline
7831     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7832
7833
7834 class LUReplaceDisks(LogicalUnit):
7835   """Replace the disks of an instance.
7836
7837   """
7838   HPATH = "mirrors-replace"
7839   HTYPE = constants.HTYPE_INSTANCE
7840   _OP_PARAMS = [
7841     _PInstanceName,
7842     ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
7843     ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
7844     ("remote_node", None, ht.TMaybeString),
7845     ("iallocator", None, ht.TMaybeString),
7846     ("early_release", False, ht.TBool),
7847     ]
7848   REQ_BGL = False
7849
7850   def CheckArguments(self):
7851     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7852                                   self.op.iallocator)
7853
7854   def ExpandNames(self):
7855     self._ExpandAndLockInstance()
7856
7857     if self.op.iallocator is not None:
7858       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7859
7860     elif self.op.remote_node is not None:
7861       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7862       self.op.remote_node = remote_node
7863
7864       # Warning: do not remove the locking of the new secondary here
7865       # unless DRBD8.AddChildren is changed to work in parallel;
7866       # currently it doesn't since parallel invocations of
7867       # FindUnusedMinor will conflict
7868       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7869       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7870
7871     else:
7872       self.needed_locks[locking.LEVEL_NODE] = []
7873       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7874
7875     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7876                                    self.op.iallocator, self.op.remote_node,
7877                                    self.op.disks, False, self.op.early_release)
7878
7879     self.tasklets = [self.replacer]
7880
7881   def DeclareLocks(self, level):
7882     # If we're not already locking all nodes in the set we have to declare the
7883     # instance's primary/secondary nodes.
7884     if (level == locking.LEVEL_NODE and
7885         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7886       self._LockInstancesNodes()
7887
7888   def BuildHooksEnv(self):
7889     """Build hooks env.
7890
7891     This runs on the master, the primary and all the secondaries.
7892
7893     """
7894     instance = self.replacer.instance
7895     env = {
7896       "MODE": self.op.mode,
7897       "NEW_SECONDARY": self.op.remote_node,
7898       "OLD_SECONDARY": instance.secondary_nodes[0],
7899       }
7900     env.update(_BuildInstanceHookEnvByObject(self, instance))
7901     nl = [
7902       self.cfg.GetMasterNode(),
7903       instance.primary_node,
7904       ]
7905     if self.op.remote_node is not None:
7906       nl.append(self.op.remote_node)
7907     return env, nl, nl
7908
7909
7910 class TLReplaceDisks(Tasklet):
7911   """Replaces disks for an instance.
7912
7913   Note: Locking is not within the scope of this class.
7914
7915   """
7916   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7917                disks, delay_iallocator, early_release):
7918     """Initializes this class.
7919
7920     """
7921     Tasklet.__init__(self, lu)
7922
7923     # Parameters
7924     self.instance_name = instance_name
7925     self.mode = mode
7926     self.iallocator_name = iallocator_name
7927     self.remote_node = remote_node
7928     self.disks = disks
7929     self.delay_iallocator = delay_iallocator
7930     self.early_release = early_release
7931
7932     # Runtime data
7933     self.instance = None
7934     self.new_node = None
7935     self.target_node = None
7936     self.other_node = None
7937     self.remote_node_info = None
7938     self.node_secondary_ip = None
7939
7940   @staticmethod
7941   def CheckArguments(mode, remote_node, iallocator):
7942     """Helper function for users of this class.
7943
7944     """
7945     # check for valid parameter combination
7946     if mode == constants.REPLACE_DISK_CHG:
7947       if remote_node is None and iallocator is None:
7948         raise errors.OpPrereqError("When changing the secondary either an"
7949                                    " iallocator script must be used or the"
7950                                    " new node given", errors.ECODE_INVAL)
7951
7952       if remote_node is not None and iallocator is not None:
7953         raise errors.OpPrereqError("Give either the iallocator or the new"
7954                                    " secondary, not both", errors.ECODE_INVAL)
7955
7956     elif remote_node is not None or iallocator is not None:
7957       # Not replacing the secondary
7958       raise errors.OpPrereqError("The iallocator and new node options can"
7959                                  " only be used when changing the"
7960                                  " secondary node", errors.ECODE_INVAL)
7961
7962   @staticmethod
7963   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7964     """Compute a new secondary node using an IAllocator.
7965
7966     """
7967     ial = IAllocator(lu.cfg, lu.rpc,
7968                      mode=constants.IALLOCATOR_MODE_RELOC,
7969                      name=instance_name,
7970                      relocate_from=relocate_from)
7971
7972     ial.Run(iallocator_name)
7973
7974     if not ial.success:
7975       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7976                                  " %s" % (iallocator_name, ial.info),
7977                                  errors.ECODE_NORES)
7978
7979     if len(ial.result) != ial.required_nodes:
7980       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7981                                  " of nodes (%s), required %s" %
7982                                  (iallocator_name,
7983                                   len(ial.result), ial.required_nodes),
7984                                  errors.ECODE_FAULT)
7985
7986     remote_node_name = ial.result[0]
7987
7988     lu.LogInfo("Selected new secondary for instance '%s': %s",
7989                instance_name, remote_node_name)
7990
7991     return remote_node_name
7992
7993   def _FindFaultyDisks(self, node_name):
7994     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7995                                     node_name, True)
7996
7997   def CheckPrereq(self):
7998     """Check prerequisites.
7999
8000     This checks that the instance is in the cluster.
8001
8002     """
8003     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8004     assert instance is not None, \
8005       "Cannot retrieve locked instance %s" % self.instance_name
8006
8007     if instance.disk_template != constants.DT_DRBD8:
8008       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8009                                  " instances", errors.ECODE_INVAL)
8010
8011     if len(instance.secondary_nodes) != 1:
8012       raise errors.OpPrereqError("The instance has a strange layout,"
8013                                  " expected one secondary but found %d" %
8014                                  len(instance.secondary_nodes),
8015                                  errors.ECODE_FAULT)
8016
8017     if not self.delay_iallocator:
8018       self._CheckPrereq2()
8019
8020   def _CheckPrereq2(self):
8021     """Check prerequisites, second part.
8022
8023     This function should always be part of CheckPrereq. It was separated and is
8024     now called from Exec because during node evacuation iallocator was only
8025     called with an unmodified cluster model, not taking planned changes into
8026     account.
8027
8028     """
8029     instance = self.instance
8030     secondary_node = instance.secondary_nodes[0]
8031
8032     if self.iallocator_name is None:
8033       remote_node = self.remote_node
8034     else:
8035       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8036                                        instance.name, instance.secondary_nodes)
8037
8038     if remote_node is not None:
8039       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8040       assert self.remote_node_info is not None, \
8041         "Cannot retrieve locked node %s" % remote_node
8042     else:
8043       self.remote_node_info = None
8044
8045     if remote_node == self.instance.primary_node:
8046       raise errors.OpPrereqError("The specified node is the primary node of"
8047                                  " the instance.", errors.ECODE_INVAL)
8048
8049     if remote_node == secondary_node:
8050       raise errors.OpPrereqError("The specified node is already the"
8051                                  " secondary node of the instance.",
8052                                  errors.ECODE_INVAL)
8053
8054     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8055                                     constants.REPLACE_DISK_CHG):
8056       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8057                                  errors.ECODE_INVAL)
8058
8059     if self.mode == constants.REPLACE_DISK_AUTO:
8060       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8061       faulty_secondary = self._FindFaultyDisks(secondary_node)
8062
8063       if faulty_primary and faulty_secondary:
8064         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8065                                    " one node and can not be repaired"
8066                                    " automatically" % self.instance_name,
8067                                    errors.ECODE_STATE)
8068
8069       if faulty_primary:
8070         self.disks = faulty_primary
8071         self.target_node = instance.primary_node
8072         self.other_node = secondary_node
8073         check_nodes = [self.target_node, self.other_node]
8074       elif faulty_secondary:
8075         self.disks = faulty_secondary
8076         self.target_node = secondary_node
8077         self.other_node = instance.primary_node
8078         check_nodes = [self.target_node, self.other_node]
8079       else:
8080         self.disks = []
8081         check_nodes = []
8082
8083     else:
8084       # Non-automatic modes
8085       if self.mode == constants.REPLACE_DISK_PRI:
8086         self.target_node = instance.primary_node
8087         self.other_node = secondary_node
8088         check_nodes = [self.target_node, self.other_node]
8089
8090       elif self.mode == constants.REPLACE_DISK_SEC:
8091         self.target_node = secondary_node
8092         self.other_node = instance.primary_node
8093         check_nodes = [self.target_node, self.other_node]
8094
8095       elif self.mode == constants.REPLACE_DISK_CHG:
8096         self.new_node = remote_node
8097         self.other_node = instance.primary_node
8098         self.target_node = secondary_node
8099         check_nodes = [self.new_node, self.other_node]
8100
8101         _CheckNodeNotDrained(self.lu, remote_node)
8102         _CheckNodeVmCapable(self.lu, remote_node)
8103
8104         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8105         assert old_node_info is not None
8106         if old_node_info.offline and not self.early_release:
8107           # doesn't make sense to delay the release
8108           self.early_release = True
8109           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8110                           " early-release mode", secondary_node)
8111
8112       else:
8113         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8114                                      self.mode)
8115
8116       # If not specified all disks should be replaced
8117       if not self.disks:
8118         self.disks = range(len(self.instance.disks))
8119
8120     for node in check_nodes:
8121       _CheckNodeOnline(self.lu, node)
8122
8123     # Check whether disks are valid
8124     for disk_idx in self.disks:
8125       instance.FindDisk(disk_idx)
8126
8127     # Get secondary node IP addresses
8128     node_2nd_ip = {}
8129
8130     for node_name in [self.target_node, self.other_node, self.new_node]:
8131       if node_name is not None:
8132         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
8133
8134     self.node_secondary_ip = node_2nd_ip
8135
8136   def Exec(self, feedback_fn):
8137     """Execute disk replacement.
8138
8139     This dispatches the disk replacement to the appropriate handler.
8140
8141     """
8142     if self.delay_iallocator:
8143       self._CheckPrereq2()
8144
8145     if not self.disks:
8146       feedback_fn("No disks need replacement")
8147       return
8148
8149     feedback_fn("Replacing disk(s) %s for %s" %
8150                 (utils.CommaJoin(self.disks), self.instance.name))
8151
8152     activate_disks = (not self.instance.admin_up)
8153
8154     # Activate the instance disks if we're replacing them on a down instance
8155     if activate_disks:
8156       _StartInstanceDisks(self.lu, self.instance, True)
8157
8158     try:
8159       # Should we replace the secondary node?
8160       if self.new_node is not None:
8161         fn = self._ExecDrbd8Secondary
8162       else:
8163         fn = self._ExecDrbd8DiskOnly
8164
8165       return fn(feedback_fn)
8166
8167     finally:
8168       # Deactivate the instance disks if we're replacing them on a
8169       # down instance
8170       if activate_disks:
8171         _SafeShutdownInstanceDisks(self.lu, self.instance)
8172
8173   def _CheckVolumeGroup(self, nodes):
8174     self.lu.LogInfo("Checking volume groups")
8175
8176     vgname = self.cfg.GetVGName()
8177
8178     # Make sure volume group exists on all involved nodes
8179     results = self.rpc.call_vg_list(nodes)
8180     if not results:
8181       raise errors.OpExecError("Can't list volume groups on the nodes")
8182
8183     for node in nodes:
8184       res = results[node]
8185       res.Raise("Error checking node %s" % node)
8186       if vgname not in res.payload:
8187         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8188                                  (vgname, node))
8189
8190   def _CheckDisksExistence(self, nodes):
8191     # Check disk existence
8192     for idx, dev in enumerate(self.instance.disks):
8193       if idx not in self.disks:
8194         continue
8195
8196       for node in nodes:
8197         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8198         self.cfg.SetDiskID(dev, node)
8199
8200         result = self.rpc.call_blockdev_find(node, dev)
8201
8202         msg = result.fail_msg
8203         if msg or not result.payload:
8204           if not msg:
8205             msg = "disk not found"
8206           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8207                                    (idx, node, msg))
8208
8209   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8210     for idx, dev in enumerate(self.instance.disks):
8211       if idx not in self.disks:
8212         continue
8213
8214       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8215                       (idx, node_name))
8216
8217       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8218                                    ldisk=ldisk):
8219         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8220                                  " replace disks for instance %s" %
8221                                  (node_name, self.instance.name))
8222
8223   def _CreateNewStorage(self, node_name):
8224     vgname = self.cfg.GetVGName()
8225     iv_names = {}
8226
8227     for idx, dev in enumerate(self.instance.disks):
8228       if idx not in self.disks:
8229         continue
8230
8231       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8232
8233       self.cfg.SetDiskID(dev, node_name)
8234
8235       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8236       names = _GenerateUniqueNames(self.lu, lv_names)
8237
8238       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8239                              logical_id=(vgname, names[0]))
8240       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8241                              logical_id=(vgname, names[1]))
8242
8243       new_lvs = [lv_data, lv_meta]
8244       old_lvs = dev.children
8245       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8246
8247       # we pass force_create=True to force the LVM creation
8248       for new_lv in new_lvs:
8249         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8250                         _GetInstanceInfoText(self.instance), False)
8251
8252     return iv_names
8253
8254   def _CheckDevices(self, node_name, iv_names):
8255     for name, (dev, _, _) in iv_names.iteritems():
8256       self.cfg.SetDiskID(dev, node_name)
8257
8258       result = self.rpc.call_blockdev_find(node_name, dev)
8259
8260       msg = result.fail_msg
8261       if msg or not result.payload:
8262         if not msg:
8263           msg = "disk not found"
8264         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8265                                  (name, msg))
8266
8267       if result.payload.is_degraded:
8268         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8269
8270   def _RemoveOldStorage(self, node_name, iv_names):
8271     for name, (_, old_lvs, _) in iv_names.iteritems():
8272       self.lu.LogInfo("Remove logical volumes for %s" % name)
8273
8274       for lv in old_lvs:
8275         self.cfg.SetDiskID(lv, node_name)
8276
8277         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8278         if msg:
8279           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8280                              hint="remove unused LVs manually")
8281
8282   def _ReleaseNodeLock(self, node_name):
8283     """Releases the lock for a given node."""
8284     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
8285
8286   def _ExecDrbd8DiskOnly(self, feedback_fn):
8287     """Replace a disk on the primary or secondary for DRBD 8.
8288
8289     The algorithm for replace is quite complicated:
8290
8291       1. for each disk to be replaced:
8292
8293         1. create new LVs on the target node with unique names
8294         1. detach old LVs from the drbd device
8295         1. rename old LVs to name_replaced.<time_t>
8296         1. rename new LVs to old LVs
8297         1. attach the new LVs (with the old names now) to the drbd device
8298
8299       1. wait for sync across all devices
8300
8301       1. for each modified disk:
8302
8303         1. remove old LVs (which have the name name_replaces.<time_t>)
8304
8305     Failures are not very well handled.
8306
8307     """
8308     steps_total = 6
8309
8310     # Step: check device activation
8311     self.lu.LogStep(1, steps_total, "Check device existence")
8312     self._CheckDisksExistence([self.other_node, self.target_node])
8313     self._CheckVolumeGroup([self.target_node, self.other_node])
8314
8315     # Step: check other node consistency
8316     self.lu.LogStep(2, steps_total, "Check peer consistency")
8317     self._CheckDisksConsistency(self.other_node,
8318                                 self.other_node == self.instance.primary_node,
8319                                 False)
8320
8321     # Step: create new storage
8322     self.lu.LogStep(3, steps_total, "Allocate new storage")
8323     iv_names = self._CreateNewStorage(self.target_node)
8324
8325     # Step: for each lv, detach+rename*2+attach
8326     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8327     for dev, old_lvs, new_lvs in iv_names.itervalues():
8328       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
8329
8330       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
8331                                                      old_lvs)
8332       result.Raise("Can't detach drbd from local storage on node"
8333                    " %s for device %s" % (self.target_node, dev.iv_name))
8334       #dev.children = []
8335       #cfg.Update(instance)
8336
8337       # ok, we created the new LVs, so now we know we have the needed
8338       # storage; as such, we proceed on the target node to rename
8339       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
8340       # using the assumption that logical_id == physical_id (which in
8341       # turn is the unique_id on that node)
8342
8343       # FIXME(iustin): use a better name for the replaced LVs
8344       temp_suffix = int(time.time())
8345       ren_fn = lambda d, suff: (d.physical_id[0],
8346                                 d.physical_id[1] + "_replaced-%s" % suff)
8347
8348       # Build the rename list based on what LVs exist on the node
8349       rename_old_to_new = []
8350       for to_ren in old_lvs:
8351         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
8352         if not result.fail_msg and result.payload:
8353           # device exists
8354           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
8355
8356       self.lu.LogInfo("Renaming the old LVs on the target node")
8357       result = self.rpc.call_blockdev_rename(self.target_node,
8358                                              rename_old_to_new)
8359       result.Raise("Can't rename old LVs on node %s" % self.target_node)
8360
8361       # Now we rename the new LVs to the old LVs
8362       self.lu.LogInfo("Renaming the new LVs on the target node")
8363       rename_new_to_old = [(new, old.physical_id)
8364                            for old, new in zip(old_lvs, new_lvs)]
8365       result = self.rpc.call_blockdev_rename(self.target_node,
8366                                              rename_new_to_old)
8367       result.Raise("Can't rename new LVs on node %s" % self.target_node)
8368
8369       for old, new in zip(old_lvs, new_lvs):
8370         new.logical_id = old.logical_id
8371         self.cfg.SetDiskID(new, self.target_node)
8372
8373       for disk in old_lvs:
8374         disk.logical_id = ren_fn(disk, temp_suffix)
8375         self.cfg.SetDiskID(disk, self.target_node)
8376
8377       # Now that the new lvs have the old name, we can add them to the device
8378       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
8379       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
8380                                                   new_lvs)
8381       msg = result.fail_msg
8382       if msg:
8383         for new_lv in new_lvs:
8384           msg2 = self.rpc.call_blockdev_remove(self.target_node,
8385                                                new_lv).fail_msg
8386           if msg2:
8387             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
8388                                hint=("cleanup manually the unused logical"
8389                                      "volumes"))
8390         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
8391
8392       dev.children = new_lvs
8393
8394       self.cfg.Update(self.instance, feedback_fn)
8395
8396     cstep = 5
8397     if self.early_release:
8398       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8399       cstep += 1
8400       self._RemoveOldStorage(self.target_node, iv_names)
8401       # WARNING: we release both node locks here, do not do other RPCs
8402       # than WaitForSync to the primary node
8403       self._ReleaseNodeLock([self.target_node, self.other_node])
8404
8405     # Wait for sync
8406     # This can fail as the old devices are degraded and _WaitForSync
8407     # does a combined result over all disks, so we don't check its return value
8408     self.lu.LogStep(cstep, steps_total, "Sync devices")
8409     cstep += 1
8410     _WaitForSync(self.lu, self.instance)
8411
8412     # Check all devices manually
8413     self._CheckDevices(self.instance.primary_node, iv_names)
8414
8415     # Step: remove old storage
8416     if not self.early_release:
8417       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8418       cstep += 1
8419       self._RemoveOldStorage(self.target_node, iv_names)
8420
8421   def _ExecDrbd8Secondary(self, feedback_fn):
8422     """Replace the secondary node for DRBD 8.
8423
8424     The algorithm for replace is quite complicated:
8425       - for all disks of the instance:
8426         - create new LVs on the new node with same names
8427         - shutdown the drbd device on the old secondary
8428         - disconnect the drbd network on the primary
8429         - create the drbd device on the new secondary
8430         - network attach the drbd on the primary, using an artifice:
8431           the drbd code for Attach() will connect to the network if it
8432           finds a device which is connected to the good local disks but
8433           not network enabled
8434       - wait for sync across all devices
8435       - remove all disks from the old secondary
8436
8437     Failures are not very well handled.
8438
8439     """
8440     steps_total = 6
8441
8442     # Step: check device activation
8443     self.lu.LogStep(1, steps_total, "Check device existence")
8444     self._CheckDisksExistence([self.instance.primary_node])
8445     self._CheckVolumeGroup([self.instance.primary_node])
8446
8447     # Step: check other node consistency
8448     self.lu.LogStep(2, steps_total, "Check peer consistency")
8449     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8450
8451     # Step: create new storage
8452     self.lu.LogStep(3, steps_total, "Allocate new storage")
8453     for idx, dev in enumerate(self.instance.disks):
8454       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8455                       (self.new_node, idx))
8456       # we pass force_create=True to force LVM creation
8457       for new_lv in dev.children:
8458         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8459                         _GetInstanceInfoText(self.instance), False)
8460
8461     # Step 4: dbrd minors and drbd setups changes
8462     # after this, we must manually remove the drbd minors on both the
8463     # error and the success paths
8464     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8465     minors = self.cfg.AllocateDRBDMinor([self.new_node
8466                                          for dev in self.instance.disks],
8467                                         self.instance.name)
8468     logging.debug("Allocated minors %r", minors)
8469
8470     iv_names = {}
8471     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8472       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8473                       (self.new_node, idx))
8474       # create new devices on new_node; note that we create two IDs:
8475       # one without port, so the drbd will be activated without
8476       # networking information on the new node at this stage, and one
8477       # with network, for the latter activation in step 4
8478       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8479       if self.instance.primary_node == o_node1:
8480         p_minor = o_minor1
8481       else:
8482         assert self.instance.primary_node == o_node2, "Three-node instance?"
8483         p_minor = o_minor2
8484
8485       new_alone_id = (self.instance.primary_node, self.new_node, None,
8486                       p_minor, new_minor, o_secret)
8487       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8488                     p_minor, new_minor, o_secret)
8489
8490       iv_names[idx] = (dev, dev.children, new_net_id)
8491       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8492                     new_net_id)
8493       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8494                               logical_id=new_alone_id,
8495                               children=dev.children,
8496                               size=dev.size)
8497       try:
8498         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8499                               _GetInstanceInfoText(self.instance), False)
8500       except errors.GenericError:
8501         self.cfg.ReleaseDRBDMinors(self.instance.name)
8502         raise
8503
8504     # We have new devices, shutdown the drbd on the old secondary
8505     for idx, dev in enumerate(self.instance.disks):
8506       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8507       self.cfg.SetDiskID(dev, self.target_node)
8508       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8509       if msg:
8510         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8511                            "node: %s" % (idx, msg),
8512                            hint=("Please cleanup this device manually as"
8513                                  " soon as possible"))
8514
8515     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8516     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8517                                                self.node_secondary_ip,
8518                                                self.instance.disks)\
8519                                               [self.instance.primary_node]
8520
8521     msg = result.fail_msg
8522     if msg:
8523       # detaches didn't succeed (unlikely)
8524       self.cfg.ReleaseDRBDMinors(self.instance.name)
8525       raise errors.OpExecError("Can't detach the disks from the network on"
8526                                " old node: %s" % (msg,))
8527
8528     # if we managed to detach at least one, we update all the disks of
8529     # the instance to point to the new secondary
8530     self.lu.LogInfo("Updating instance configuration")
8531     for dev, _, new_logical_id in iv_names.itervalues():
8532       dev.logical_id = new_logical_id
8533       self.cfg.SetDiskID(dev, self.instance.primary_node)
8534
8535     self.cfg.Update(self.instance, feedback_fn)
8536
8537     # and now perform the drbd attach
8538     self.lu.LogInfo("Attaching primary drbds to new secondary"
8539                     " (standalone => connected)")
8540     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8541                                             self.new_node],
8542                                            self.node_secondary_ip,
8543                                            self.instance.disks,
8544                                            self.instance.name,
8545                                            False)
8546     for to_node, to_result in result.items():
8547       msg = to_result.fail_msg
8548       if msg:
8549         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8550                            to_node, msg,
8551                            hint=("please do a gnt-instance info to see the"
8552                                  " status of disks"))
8553     cstep = 5
8554     if self.early_release:
8555       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8556       cstep += 1
8557       self._RemoveOldStorage(self.target_node, iv_names)
8558       # WARNING: we release all node locks here, do not do other RPCs
8559       # than WaitForSync to the primary node
8560       self._ReleaseNodeLock([self.instance.primary_node,
8561                              self.target_node,
8562                              self.new_node])
8563
8564     # Wait for sync
8565     # This can fail as the old devices are degraded and _WaitForSync
8566     # does a combined result over all disks, so we don't check its return value
8567     self.lu.LogStep(cstep, steps_total, "Sync devices")
8568     cstep += 1
8569     _WaitForSync(self.lu, self.instance)
8570
8571     # Check all devices manually
8572     self._CheckDevices(self.instance.primary_node, iv_names)
8573
8574     # Step: remove old storage
8575     if not self.early_release:
8576       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8577       self._RemoveOldStorage(self.target_node, iv_names)
8578
8579
8580 class LURepairNodeStorage(NoHooksLU):
8581   """Repairs the volume group on a node.
8582
8583   """
8584   _OP_PARAMS = [
8585     _PNodeName,
8586     ("storage_type", ht.NoDefault, _CheckStorageType),
8587     ("name", ht.NoDefault, ht.TNonEmptyString),
8588     ("ignore_consistency", False, ht.TBool),
8589     ]
8590   REQ_BGL = False
8591
8592   def CheckArguments(self):
8593     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8594
8595     storage_type = self.op.storage_type
8596
8597     if (constants.SO_FIX_CONSISTENCY not in
8598         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8599       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8600                                  " repaired" % storage_type,
8601                                  errors.ECODE_INVAL)
8602
8603   def ExpandNames(self):
8604     self.needed_locks = {
8605       locking.LEVEL_NODE: [self.op.node_name],
8606       }
8607
8608   def _CheckFaultyDisks(self, instance, node_name):
8609     """Ensure faulty disks abort the opcode or at least warn."""
8610     try:
8611       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8612                                   node_name, True):
8613         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8614                                    " node '%s'" % (instance.name, node_name),
8615                                    errors.ECODE_STATE)
8616     except errors.OpPrereqError, err:
8617       if self.op.ignore_consistency:
8618         self.proc.LogWarning(str(err.args[0]))
8619       else:
8620         raise
8621
8622   def CheckPrereq(self):
8623     """Check prerequisites.
8624
8625     """
8626     # Check whether any instance on this node has faulty disks
8627     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8628       if not inst.admin_up:
8629         continue
8630       check_nodes = set(inst.all_nodes)
8631       check_nodes.discard(self.op.node_name)
8632       for inst_node_name in check_nodes:
8633         self._CheckFaultyDisks(inst, inst_node_name)
8634
8635   def Exec(self, feedback_fn):
8636     feedback_fn("Repairing storage unit '%s' on %s ..." %
8637                 (self.op.name, self.op.node_name))
8638
8639     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8640     result = self.rpc.call_storage_execute(self.op.node_name,
8641                                            self.op.storage_type, st_args,
8642                                            self.op.name,
8643                                            constants.SO_FIX_CONSISTENCY)
8644     result.Raise("Failed to repair storage unit '%s' on %s" %
8645                  (self.op.name, self.op.node_name))
8646
8647
8648 class LUNodeEvacuationStrategy(NoHooksLU):
8649   """Computes the node evacuation strategy.
8650
8651   """
8652   _OP_PARAMS = [
8653     ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8654     ("remote_node", None, ht.TMaybeString),
8655     ("iallocator", None, ht.TMaybeString),
8656     ]
8657   REQ_BGL = False
8658
8659   def CheckArguments(self):
8660     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8661
8662   def ExpandNames(self):
8663     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8664     self.needed_locks = locks = {}
8665     if self.op.remote_node is None:
8666       locks[locking.LEVEL_NODE] = locking.ALL_SET
8667     else:
8668       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8669       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8670
8671   def Exec(self, feedback_fn):
8672     if self.op.remote_node is not None:
8673       instances = []
8674       for node in self.op.nodes:
8675         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8676       result = []
8677       for i in instances:
8678         if i.primary_node == self.op.remote_node:
8679           raise errors.OpPrereqError("Node %s is the primary node of"
8680                                      " instance %s, cannot use it as"
8681                                      " secondary" %
8682                                      (self.op.remote_node, i.name),
8683                                      errors.ECODE_INVAL)
8684         result.append([i.name, self.op.remote_node])
8685     else:
8686       ial = IAllocator(self.cfg, self.rpc,
8687                        mode=constants.IALLOCATOR_MODE_MEVAC,
8688                        evac_nodes=self.op.nodes)
8689       ial.Run(self.op.iallocator, validate=True)
8690       if not ial.success:
8691         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8692                                  errors.ECODE_NORES)
8693       result = ial.result
8694     return result
8695
8696
8697 class LUGrowDisk(LogicalUnit):
8698   """Grow a disk of an instance.
8699
8700   """
8701   HPATH = "disk-grow"
8702   HTYPE = constants.HTYPE_INSTANCE
8703   _OP_PARAMS = [
8704     _PInstanceName,
8705     ("disk", ht.NoDefault, ht.TInt),
8706     ("amount", ht.NoDefault, ht.TInt),
8707     ("wait_for_sync", True, ht.TBool),
8708     ]
8709   REQ_BGL = False
8710
8711   def ExpandNames(self):
8712     self._ExpandAndLockInstance()
8713     self.needed_locks[locking.LEVEL_NODE] = []
8714     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8715
8716   def DeclareLocks(self, level):
8717     if level == locking.LEVEL_NODE:
8718       self._LockInstancesNodes()
8719
8720   def BuildHooksEnv(self):
8721     """Build hooks env.
8722
8723     This runs on the master, the primary and all the secondaries.
8724
8725     """
8726     env = {
8727       "DISK": self.op.disk,
8728       "AMOUNT": self.op.amount,
8729       }
8730     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8731     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8732     return env, nl, nl
8733
8734   def CheckPrereq(self):
8735     """Check prerequisites.
8736
8737     This checks that the instance is in the cluster.
8738
8739     """
8740     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8741     assert instance is not None, \
8742       "Cannot retrieve locked instance %s" % self.op.instance_name
8743     nodenames = list(instance.all_nodes)
8744     for node in nodenames:
8745       _CheckNodeOnline(self, node)
8746
8747     self.instance = instance
8748
8749     if instance.disk_template not in constants.DTS_GROWABLE:
8750       raise errors.OpPrereqError("Instance's disk layout does not support"
8751                                  " growing.", errors.ECODE_INVAL)
8752
8753     self.disk = instance.FindDisk(self.op.disk)
8754
8755     if instance.disk_template != constants.DT_FILE:
8756       # TODO: check the free disk space for file, when that feature will be
8757       # supported
8758       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8759
8760   def Exec(self, feedback_fn):
8761     """Execute disk grow.
8762
8763     """
8764     instance = self.instance
8765     disk = self.disk
8766
8767     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8768     if not disks_ok:
8769       raise errors.OpExecError("Cannot activate block device to grow")
8770
8771     for node in instance.all_nodes:
8772       self.cfg.SetDiskID(disk, node)
8773       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8774       result.Raise("Grow request failed to node %s" % node)
8775
8776       # TODO: Rewrite code to work properly
8777       # DRBD goes into sync mode for a short amount of time after executing the
8778       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8779       # calling "resize" in sync mode fails. Sleeping for a short amount of
8780       # time is a work-around.
8781       time.sleep(5)
8782
8783     disk.RecordGrow(self.op.amount)
8784     self.cfg.Update(instance, feedback_fn)
8785     if self.op.wait_for_sync:
8786       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8787       if disk_abort:
8788         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8789                              " status.\nPlease check the instance.")
8790       if not instance.admin_up:
8791         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8792     elif not instance.admin_up:
8793       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8794                            " not supposed to be running because no wait for"
8795                            " sync mode was requested.")
8796
8797
8798 class LUQueryInstanceData(NoHooksLU):
8799   """Query runtime instance data.
8800
8801   """
8802   _OP_PARAMS = [
8803     ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
8804     ("static", False, ht.TBool),
8805     ]
8806   REQ_BGL = False
8807
8808   def ExpandNames(self):
8809     self.needed_locks = {}
8810     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8811
8812     if self.op.instances:
8813       self.wanted_names = []
8814       for name in self.op.instances:
8815         full_name = _ExpandInstanceName(self.cfg, name)
8816         self.wanted_names.append(full_name)
8817       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8818     else:
8819       self.wanted_names = None
8820       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8821
8822     self.needed_locks[locking.LEVEL_NODE] = []
8823     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8824
8825   def DeclareLocks(self, level):
8826     if level == locking.LEVEL_NODE:
8827       self._LockInstancesNodes()
8828
8829   def CheckPrereq(self):
8830     """Check prerequisites.
8831
8832     This only checks the optional instance list against the existing names.
8833
8834     """
8835     if self.wanted_names is None:
8836       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8837
8838     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8839                              in self.wanted_names]
8840
8841   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8842     """Returns the status of a block device
8843
8844     """
8845     if self.op.static or not node:
8846       return None
8847
8848     self.cfg.SetDiskID(dev, node)
8849
8850     result = self.rpc.call_blockdev_find(node, dev)
8851     if result.offline:
8852       return None
8853
8854     result.Raise("Can't compute disk status for %s" % instance_name)
8855
8856     status = result.payload
8857     if status is None:
8858       return None
8859
8860     return (status.dev_path, status.major, status.minor,
8861             status.sync_percent, status.estimated_time,
8862             status.is_degraded, status.ldisk_status)
8863
8864   def _ComputeDiskStatus(self, instance, snode, dev):
8865     """Compute block device status.
8866
8867     """
8868     if dev.dev_type in constants.LDS_DRBD:
8869       # we change the snode then (otherwise we use the one passed in)
8870       if dev.logical_id[0] == instance.primary_node:
8871         snode = dev.logical_id[1]
8872       else:
8873         snode = dev.logical_id[0]
8874
8875     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8876                                               instance.name, dev)
8877     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8878
8879     if dev.children:
8880       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8881                       for child in dev.children]
8882     else:
8883       dev_children = []
8884
8885     data = {
8886       "iv_name": dev.iv_name,
8887       "dev_type": dev.dev_type,
8888       "logical_id": dev.logical_id,
8889       "physical_id": dev.physical_id,
8890       "pstatus": dev_pstatus,
8891       "sstatus": dev_sstatus,
8892       "children": dev_children,
8893       "mode": dev.mode,
8894       "size": dev.size,
8895       }
8896
8897     return data
8898
8899   def Exec(self, feedback_fn):
8900     """Gather and return data"""
8901     result = {}
8902
8903     cluster = self.cfg.GetClusterInfo()
8904
8905     for instance in self.wanted_instances:
8906       if not self.op.static:
8907         remote_info = self.rpc.call_instance_info(instance.primary_node,
8908                                                   instance.name,
8909                                                   instance.hypervisor)
8910         remote_info.Raise("Error checking node %s" % instance.primary_node)
8911         remote_info = remote_info.payload
8912         if remote_info and "state" in remote_info:
8913           remote_state = "up"
8914         else:
8915           remote_state = "down"
8916       else:
8917         remote_state = None
8918       if instance.admin_up:
8919         config_state = "up"
8920       else:
8921         config_state = "down"
8922
8923       disks = [self._ComputeDiskStatus(instance, None, device)
8924                for device in instance.disks]
8925
8926       idict = {
8927         "name": instance.name,
8928         "config_state": config_state,
8929         "run_state": remote_state,
8930         "pnode": instance.primary_node,
8931         "snodes": instance.secondary_nodes,
8932         "os": instance.os,
8933         # this happens to be the same format used for hooks
8934         "nics": _NICListToTuple(self, instance.nics),
8935         "disk_template": instance.disk_template,
8936         "disks": disks,
8937         "hypervisor": instance.hypervisor,
8938         "network_port": instance.network_port,
8939         "hv_instance": instance.hvparams,
8940         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8941         "be_instance": instance.beparams,
8942         "be_actual": cluster.FillBE(instance),
8943         "os_instance": instance.osparams,
8944         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8945         "serial_no": instance.serial_no,
8946         "mtime": instance.mtime,
8947         "ctime": instance.ctime,
8948         "uuid": instance.uuid,
8949         }
8950
8951       result[instance.name] = idict
8952
8953     return result
8954
8955
8956 class LUSetInstanceParams(LogicalUnit):
8957   """Modifies an instances's parameters.
8958
8959   """
8960   HPATH = "instance-modify"
8961   HTYPE = constants.HTYPE_INSTANCE
8962   _OP_PARAMS = [
8963     _PInstanceName,
8964     ("nics", ht.EmptyList, ht.TList),
8965     ("disks", ht.EmptyList, ht.TList),
8966     ("beparams", ht.EmptyDict, ht.TDict),
8967     ("hvparams", ht.EmptyDict, ht.TDict),
8968     ("disk_template", None, ht.TMaybeString),
8969     ("remote_node", None, ht.TMaybeString),
8970     ("os_name", None, ht.TMaybeString),
8971     ("force_variant", False, ht.TBool),
8972     ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
8973     _PForce,
8974     ]
8975   REQ_BGL = False
8976
8977   def CheckArguments(self):
8978     if not (self.op.nics or self.op.disks or self.op.disk_template or
8979             self.op.hvparams or self.op.beparams or self.op.os_name):
8980       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8981
8982     if self.op.hvparams:
8983       _CheckGlobalHvParams(self.op.hvparams)
8984
8985     # Disk validation
8986     disk_addremove = 0
8987     for disk_op, disk_dict in self.op.disks:
8988       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8989       if disk_op == constants.DDM_REMOVE:
8990         disk_addremove += 1
8991         continue
8992       elif disk_op == constants.DDM_ADD:
8993         disk_addremove += 1
8994       else:
8995         if not isinstance(disk_op, int):
8996           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8997         if not isinstance(disk_dict, dict):
8998           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8999           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9000
9001       if disk_op == constants.DDM_ADD:
9002         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
9003         if mode not in constants.DISK_ACCESS_SET:
9004           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9005                                      errors.ECODE_INVAL)
9006         size = disk_dict.get('size', None)
9007         if size is None:
9008           raise errors.OpPrereqError("Required disk parameter size missing",
9009                                      errors.ECODE_INVAL)
9010         try:
9011           size = int(size)
9012         except (TypeError, ValueError), err:
9013           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9014                                      str(err), errors.ECODE_INVAL)
9015         disk_dict['size'] = size
9016       else:
9017         # modification of disk
9018         if 'size' in disk_dict:
9019           raise errors.OpPrereqError("Disk size change not possible, use"
9020                                      " grow-disk", errors.ECODE_INVAL)
9021
9022     if disk_addremove > 1:
9023       raise errors.OpPrereqError("Only one disk add or remove operation"
9024                                  " supported at a time", errors.ECODE_INVAL)
9025
9026     if self.op.disks and self.op.disk_template is not None:
9027       raise errors.OpPrereqError("Disk template conversion and other disk"
9028                                  " changes not supported at the same time",
9029                                  errors.ECODE_INVAL)
9030
9031     if self.op.disk_template:
9032       _CheckDiskTemplate(self.op.disk_template)
9033       if (self.op.disk_template in constants.DTS_NET_MIRROR and
9034           self.op.remote_node is None):
9035         raise errors.OpPrereqError("Changing the disk template to a mirrored"
9036                                    " one requires specifying a secondary node",
9037                                    errors.ECODE_INVAL)
9038
9039     # NIC validation
9040     nic_addremove = 0
9041     for nic_op, nic_dict in self.op.nics:
9042       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9043       if nic_op == constants.DDM_REMOVE:
9044         nic_addremove += 1
9045         continue
9046       elif nic_op == constants.DDM_ADD:
9047         nic_addremove += 1
9048       else:
9049         if not isinstance(nic_op, int):
9050           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9051         if not isinstance(nic_dict, dict):
9052           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9053           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9054
9055       # nic_dict should be a dict
9056       nic_ip = nic_dict.get('ip', None)
9057       if nic_ip is not None:
9058         if nic_ip.lower() == constants.VALUE_NONE:
9059           nic_dict['ip'] = None
9060         else:
9061           if not netutils.IPAddress.IsValid(nic_ip):
9062             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9063                                        errors.ECODE_INVAL)
9064
9065       nic_bridge = nic_dict.get('bridge', None)
9066       nic_link = nic_dict.get('link', None)
9067       if nic_bridge and nic_link:
9068         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9069                                    " at the same time", errors.ECODE_INVAL)
9070       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9071         nic_dict['bridge'] = None
9072       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9073         nic_dict['link'] = None
9074
9075       if nic_op == constants.DDM_ADD:
9076         nic_mac = nic_dict.get('mac', None)
9077         if nic_mac is None:
9078           nic_dict['mac'] = constants.VALUE_AUTO
9079
9080       if 'mac' in nic_dict:
9081         nic_mac = nic_dict['mac']
9082         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9083           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9084
9085         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9086           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9087                                      " modifying an existing nic",
9088                                      errors.ECODE_INVAL)
9089
9090     if nic_addremove > 1:
9091       raise errors.OpPrereqError("Only one NIC add or remove operation"
9092                                  " supported at a time", errors.ECODE_INVAL)
9093
9094   def ExpandNames(self):
9095     self._ExpandAndLockInstance()
9096     self.needed_locks[locking.LEVEL_NODE] = []
9097     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9098
9099   def DeclareLocks(self, level):
9100     if level == locking.LEVEL_NODE:
9101       self._LockInstancesNodes()
9102       if self.op.disk_template and self.op.remote_node:
9103         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9104         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9105
9106   def BuildHooksEnv(self):
9107     """Build hooks env.
9108
9109     This runs on the master, primary and secondaries.
9110
9111     """
9112     args = dict()
9113     if constants.BE_MEMORY in self.be_new:
9114       args['memory'] = self.be_new[constants.BE_MEMORY]
9115     if constants.BE_VCPUS in self.be_new:
9116       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9117     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9118     # information at all.
9119     if self.op.nics:
9120       args['nics'] = []
9121       nic_override = dict(self.op.nics)
9122       for idx, nic in enumerate(self.instance.nics):
9123         if idx in nic_override:
9124           this_nic_override = nic_override[idx]
9125         else:
9126           this_nic_override = {}
9127         if 'ip' in this_nic_override:
9128           ip = this_nic_override['ip']
9129         else:
9130           ip = nic.ip
9131         if 'mac' in this_nic_override:
9132           mac = this_nic_override['mac']
9133         else:
9134           mac = nic.mac
9135         if idx in self.nic_pnew:
9136           nicparams = self.nic_pnew[idx]
9137         else:
9138           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9139         mode = nicparams[constants.NIC_MODE]
9140         link = nicparams[constants.NIC_LINK]
9141         args['nics'].append((ip, mac, mode, link))
9142       if constants.DDM_ADD in nic_override:
9143         ip = nic_override[constants.DDM_ADD].get('ip', None)
9144         mac = nic_override[constants.DDM_ADD]['mac']
9145         nicparams = self.nic_pnew[constants.DDM_ADD]
9146         mode = nicparams[constants.NIC_MODE]
9147         link = nicparams[constants.NIC_LINK]
9148         args['nics'].append((ip, mac, mode, link))
9149       elif constants.DDM_REMOVE in nic_override:
9150         del args['nics'][-1]
9151
9152     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9153     if self.op.disk_template:
9154       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9155     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9156     return env, nl, nl
9157
9158   def CheckPrereq(self):
9159     """Check prerequisites.
9160
9161     This only checks the instance list against the existing names.
9162
9163     """
9164     # checking the new params on the primary/secondary nodes
9165
9166     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9167     cluster = self.cluster = self.cfg.GetClusterInfo()
9168     assert self.instance is not None, \
9169       "Cannot retrieve locked instance %s" % self.op.instance_name
9170     pnode = instance.primary_node
9171     nodelist = list(instance.all_nodes)
9172
9173     # OS change
9174     if self.op.os_name and not self.op.force:
9175       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9176                       self.op.force_variant)
9177       instance_os = self.op.os_name
9178     else:
9179       instance_os = instance.os
9180
9181     if self.op.disk_template:
9182       if instance.disk_template == self.op.disk_template:
9183         raise errors.OpPrereqError("Instance already has disk template %s" %
9184                                    instance.disk_template, errors.ECODE_INVAL)
9185
9186       if (instance.disk_template,
9187           self.op.disk_template) not in self._DISK_CONVERSIONS:
9188         raise errors.OpPrereqError("Unsupported disk template conversion from"
9189                                    " %s to %s" % (instance.disk_template,
9190                                                   self.op.disk_template),
9191                                    errors.ECODE_INVAL)
9192       _CheckInstanceDown(self, instance, "cannot change disk template")
9193       if self.op.disk_template in constants.DTS_NET_MIRROR:
9194         if self.op.remote_node == pnode:
9195           raise errors.OpPrereqError("Given new secondary node %s is the same"
9196                                      " as the primary node of the instance" %
9197                                      self.op.remote_node, errors.ECODE_STATE)
9198         _CheckNodeOnline(self, self.op.remote_node)
9199         _CheckNodeNotDrained(self, self.op.remote_node)
9200         disks = [{"size": d.size} for d in instance.disks]
9201         required = _ComputeDiskSize(self.op.disk_template, disks)
9202         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
9203
9204     # hvparams processing
9205     if self.op.hvparams:
9206       hv_type = instance.hypervisor
9207       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9208       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9209       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9210
9211       # local check
9212       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9213       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9214       self.hv_new = hv_new # the new actual values
9215       self.hv_inst = i_hvdict # the new dict (without defaults)
9216     else:
9217       self.hv_new = self.hv_inst = {}
9218
9219     # beparams processing
9220     if self.op.beparams:
9221       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9222                                    use_none=True)
9223       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9224       be_new = cluster.SimpleFillBE(i_bedict)
9225       self.be_new = be_new # the new actual values
9226       self.be_inst = i_bedict # the new dict (without defaults)
9227     else:
9228       self.be_new = self.be_inst = {}
9229
9230     # osparams processing
9231     if self.op.osparams:
9232       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9233       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9234       self.os_inst = i_osdict # the new dict (without defaults)
9235     else:
9236       self.os_inst = {}
9237
9238     self.warn = []
9239
9240     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9241       mem_check_list = [pnode]
9242       if be_new[constants.BE_AUTO_BALANCE]:
9243         # either we changed auto_balance to yes or it was from before
9244         mem_check_list.extend(instance.secondary_nodes)
9245       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9246                                                   instance.hypervisor)
9247       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
9248                                          instance.hypervisor)
9249       pninfo = nodeinfo[pnode]
9250       msg = pninfo.fail_msg
9251       if msg:
9252         # Assume the primary node is unreachable and go ahead
9253         self.warn.append("Can't get info from primary node %s: %s" %
9254                          (pnode,  msg))
9255       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9256         self.warn.append("Node data from primary node %s doesn't contain"
9257                          " free memory information" % pnode)
9258       elif instance_info.fail_msg:
9259         self.warn.append("Can't get instance runtime information: %s" %
9260                         instance_info.fail_msg)
9261       else:
9262         if instance_info.payload:
9263           current_mem = int(instance_info.payload['memory'])
9264         else:
9265           # Assume instance not running
9266           # (there is a slight race condition here, but it's not very probable,
9267           # and we have no other way to check)
9268           current_mem = 0
9269         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9270                     pninfo.payload['memory_free'])
9271         if miss_mem > 0:
9272           raise errors.OpPrereqError("This change will prevent the instance"
9273                                      " from starting, due to %d MB of memory"
9274                                      " missing on its primary node" % miss_mem,
9275                                      errors.ECODE_NORES)
9276
9277       if be_new[constants.BE_AUTO_BALANCE]:
9278         for node, nres in nodeinfo.items():
9279           if node not in instance.secondary_nodes:
9280             continue
9281           msg = nres.fail_msg
9282           if msg:
9283             self.warn.append("Can't get info from secondary node %s: %s" %
9284                              (node, msg))
9285           elif not isinstance(nres.payload.get('memory_free', None), int):
9286             self.warn.append("Secondary node %s didn't return free"
9287                              " memory information" % node)
9288           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9289             self.warn.append("Not enough memory to failover instance to"
9290                              " secondary node %s" % node)
9291
9292     # NIC processing
9293     self.nic_pnew = {}
9294     self.nic_pinst = {}
9295     for nic_op, nic_dict in self.op.nics:
9296       if nic_op == constants.DDM_REMOVE:
9297         if not instance.nics:
9298           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9299                                      errors.ECODE_INVAL)
9300         continue
9301       if nic_op != constants.DDM_ADD:
9302         # an existing nic
9303         if not instance.nics:
9304           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
9305                                      " no NICs" % nic_op,
9306                                      errors.ECODE_INVAL)
9307         if nic_op < 0 or nic_op >= len(instance.nics):
9308           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
9309                                      " are 0 to %d" %
9310                                      (nic_op, len(instance.nics) - 1),
9311                                      errors.ECODE_INVAL)
9312         old_nic_params = instance.nics[nic_op].nicparams
9313         old_nic_ip = instance.nics[nic_op].ip
9314       else:
9315         old_nic_params = {}
9316         old_nic_ip = None
9317
9318       update_params_dict = dict([(key, nic_dict[key])
9319                                  for key in constants.NICS_PARAMETERS
9320                                  if key in nic_dict])
9321
9322       if 'bridge' in nic_dict:
9323         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
9324
9325       new_nic_params = _GetUpdatedParams(old_nic_params,
9326                                          update_params_dict)
9327       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
9328       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
9329       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
9330       self.nic_pinst[nic_op] = new_nic_params
9331       self.nic_pnew[nic_op] = new_filled_nic_params
9332       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
9333
9334       if new_nic_mode == constants.NIC_MODE_BRIDGED:
9335         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
9336         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
9337         if msg:
9338           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
9339           if self.op.force:
9340             self.warn.append(msg)
9341           else:
9342             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
9343       if new_nic_mode == constants.NIC_MODE_ROUTED:
9344         if 'ip' in nic_dict:
9345           nic_ip = nic_dict['ip']
9346         else:
9347           nic_ip = old_nic_ip
9348         if nic_ip is None:
9349           raise errors.OpPrereqError('Cannot set the nic ip to None'
9350                                      ' on a routed nic', errors.ECODE_INVAL)
9351       if 'mac' in nic_dict:
9352         nic_mac = nic_dict['mac']
9353         if nic_mac is None:
9354           raise errors.OpPrereqError('Cannot set the nic mac to None',
9355                                      errors.ECODE_INVAL)
9356         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9357           # otherwise generate the mac
9358           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
9359         else:
9360           # or validate/reserve the current one
9361           try:
9362             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
9363           except errors.ReservationError:
9364             raise errors.OpPrereqError("MAC address %s already in use"
9365                                        " in cluster" % nic_mac,
9366                                        errors.ECODE_NOTUNIQUE)
9367
9368     # DISK processing
9369     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
9370       raise errors.OpPrereqError("Disk operations not supported for"
9371                                  " diskless instances",
9372                                  errors.ECODE_INVAL)
9373     for disk_op, _ in self.op.disks:
9374       if disk_op == constants.DDM_REMOVE:
9375         if len(instance.disks) == 1:
9376           raise errors.OpPrereqError("Cannot remove the last disk of"
9377                                      " an instance", errors.ECODE_INVAL)
9378         _CheckInstanceDown(self, instance, "cannot remove disks")
9379
9380       if (disk_op == constants.DDM_ADD and
9381           len(instance.nics) >= constants.MAX_DISKS):
9382         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
9383                                    " add more" % constants.MAX_DISKS,
9384                                    errors.ECODE_STATE)
9385       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
9386         # an existing disk
9387         if disk_op < 0 or disk_op >= len(instance.disks):
9388           raise errors.OpPrereqError("Invalid disk index %s, valid values"
9389                                      " are 0 to %d" %
9390                                      (disk_op, len(instance.disks)),
9391                                      errors.ECODE_INVAL)
9392
9393     return
9394
9395   def _ConvertPlainToDrbd(self, feedback_fn):
9396     """Converts an instance from plain to drbd.
9397
9398     """
9399     feedback_fn("Converting template to drbd")
9400     instance = self.instance
9401     pnode = instance.primary_node
9402     snode = self.op.remote_node
9403
9404     # create a fake disk info for _GenerateDiskTemplate
9405     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9406     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9407                                       instance.name, pnode, [snode],
9408                                       disk_info, None, None, 0)
9409     info = _GetInstanceInfoText(instance)
9410     feedback_fn("Creating aditional volumes...")
9411     # first, create the missing data and meta devices
9412     for disk in new_disks:
9413       # unfortunately this is... not too nice
9414       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9415                             info, True)
9416       for child in disk.children:
9417         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9418     # at this stage, all new LVs have been created, we can rename the
9419     # old ones
9420     feedback_fn("Renaming original volumes...")
9421     rename_list = [(o, n.children[0].logical_id)
9422                    for (o, n) in zip(instance.disks, new_disks)]
9423     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9424     result.Raise("Failed to rename original LVs")
9425
9426     feedback_fn("Initializing DRBD devices...")
9427     # all child devices are in place, we can now create the DRBD devices
9428     for disk in new_disks:
9429       for node in [pnode, snode]:
9430         f_create = node == pnode
9431         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9432
9433     # at this point, the instance has been modified
9434     instance.disk_template = constants.DT_DRBD8
9435     instance.disks = new_disks
9436     self.cfg.Update(instance, feedback_fn)
9437
9438     # disks are created, waiting for sync
9439     disk_abort = not _WaitForSync(self, instance)
9440     if disk_abort:
9441       raise errors.OpExecError("There are some degraded disks for"
9442                                " this instance, please cleanup manually")
9443
9444   def _ConvertDrbdToPlain(self, feedback_fn):
9445     """Converts an instance from drbd to plain.
9446
9447     """
9448     instance = self.instance
9449     assert len(instance.secondary_nodes) == 1
9450     pnode = instance.primary_node
9451     snode = instance.secondary_nodes[0]
9452     feedback_fn("Converting template to plain")
9453
9454     old_disks = instance.disks
9455     new_disks = [d.children[0] for d in old_disks]
9456
9457     # copy over size and mode
9458     for parent, child in zip(old_disks, new_disks):
9459       child.size = parent.size
9460       child.mode = parent.mode
9461
9462     # update instance structure
9463     instance.disks = new_disks
9464     instance.disk_template = constants.DT_PLAIN
9465     self.cfg.Update(instance, feedback_fn)
9466
9467     feedback_fn("Removing volumes on the secondary node...")
9468     for disk in old_disks:
9469       self.cfg.SetDiskID(disk, snode)
9470       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9471       if msg:
9472         self.LogWarning("Could not remove block device %s on node %s,"
9473                         " continuing anyway: %s", disk.iv_name, snode, msg)
9474
9475     feedback_fn("Removing unneeded volumes on the primary node...")
9476     for idx, disk in enumerate(old_disks):
9477       meta = disk.children[1]
9478       self.cfg.SetDiskID(meta, pnode)
9479       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9480       if msg:
9481         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9482                         " continuing anyway: %s", idx, pnode, msg)
9483
9484
9485   def Exec(self, feedback_fn):
9486     """Modifies an instance.
9487
9488     All parameters take effect only at the next restart of the instance.
9489
9490     """
9491     # Process here the warnings from CheckPrereq, as we don't have a
9492     # feedback_fn there.
9493     for warn in self.warn:
9494       feedback_fn("WARNING: %s" % warn)
9495
9496     result = []
9497     instance = self.instance
9498     # disk changes
9499     for disk_op, disk_dict in self.op.disks:
9500       if disk_op == constants.DDM_REMOVE:
9501         # remove the last disk
9502         device = instance.disks.pop()
9503         device_idx = len(instance.disks)
9504         for node, disk in device.ComputeNodeTree(instance.primary_node):
9505           self.cfg.SetDiskID(disk, node)
9506           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9507           if msg:
9508             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9509                             " continuing anyway", device_idx, node, msg)
9510         result.append(("disk/%d" % device_idx, "remove"))
9511       elif disk_op == constants.DDM_ADD:
9512         # add a new disk
9513         if instance.disk_template == constants.DT_FILE:
9514           file_driver, file_path = instance.disks[0].logical_id
9515           file_path = os.path.dirname(file_path)
9516         else:
9517           file_driver = file_path = None
9518         disk_idx_base = len(instance.disks)
9519         new_disk = _GenerateDiskTemplate(self,
9520                                          instance.disk_template,
9521                                          instance.name, instance.primary_node,
9522                                          instance.secondary_nodes,
9523                                          [disk_dict],
9524                                          file_path,
9525                                          file_driver,
9526                                          disk_idx_base)[0]
9527         instance.disks.append(new_disk)
9528         info = _GetInstanceInfoText(instance)
9529
9530         logging.info("Creating volume %s for instance %s",
9531                      new_disk.iv_name, instance.name)
9532         # Note: this needs to be kept in sync with _CreateDisks
9533         #HARDCODE
9534         for node in instance.all_nodes:
9535           f_create = node == instance.primary_node
9536           try:
9537             _CreateBlockDev(self, node, instance, new_disk,
9538                             f_create, info, f_create)
9539           except errors.OpExecError, err:
9540             self.LogWarning("Failed to create volume %s (%s) on"
9541                             " node %s: %s",
9542                             new_disk.iv_name, new_disk, node, err)
9543         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9544                        (new_disk.size, new_disk.mode)))
9545       else:
9546         # change a given disk
9547         instance.disks[disk_op].mode = disk_dict['mode']
9548         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9549
9550     if self.op.disk_template:
9551       r_shut = _ShutdownInstanceDisks(self, instance)
9552       if not r_shut:
9553         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9554                                  " proceed with disk template conversion")
9555       mode = (instance.disk_template, self.op.disk_template)
9556       try:
9557         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9558       except:
9559         self.cfg.ReleaseDRBDMinors(instance.name)
9560         raise
9561       result.append(("disk_template", self.op.disk_template))
9562
9563     # NIC changes
9564     for nic_op, nic_dict in self.op.nics:
9565       if nic_op == constants.DDM_REMOVE:
9566         # remove the last nic
9567         del instance.nics[-1]
9568         result.append(("nic.%d" % len(instance.nics), "remove"))
9569       elif nic_op == constants.DDM_ADD:
9570         # mac and bridge should be set, by now
9571         mac = nic_dict['mac']
9572         ip = nic_dict.get('ip', None)
9573         nicparams = self.nic_pinst[constants.DDM_ADD]
9574         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9575         instance.nics.append(new_nic)
9576         result.append(("nic.%d" % (len(instance.nics) - 1),
9577                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9578                        (new_nic.mac, new_nic.ip,
9579                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9580                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9581                        )))
9582       else:
9583         for key in 'mac', 'ip':
9584           if key in nic_dict:
9585             setattr(instance.nics[nic_op], key, nic_dict[key])
9586         if nic_op in self.nic_pinst:
9587           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9588         for key, val in nic_dict.iteritems():
9589           result.append(("nic.%s/%d" % (key, nic_op), val))
9590
9591     # hvparams changes
9592     if self.op.hvparams:
9593       instance.hvparams = self.hv_inst
9594       for key, val in self.op.hvparams.iteritems():
9595         result.append(("hv/%s" % key, val))
9596
9597     # beparams changes
9598     if self.op.beparams:
9599       instance.beparams = self.be_inst
9600       for key, val in self.op.beparams.iteritems():
9601         result.append(("be/%s" % key, val))
9602
9603     # OS change
9604     if self.op.os_name:
9605       instance.os = self.op.os_name
9606
9607     # osparams changes
9608     if self.op.osparams:
9609       instance.osparams = self.os_inst
9610       for key, val in self.op.osparams.iteritems():
9611         result.append(("os/%s" % key, val))
9612
9613     self.cfg.Update(instance, feedback_fn)
9614
9615     return result
9616
9617   _DISK_CONVERSIONS = {
9618     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9619     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9620     }
9621
9622
9623 class LUQueryExports(NoHooksLU):
9624   """Query the exports list
9625
9626   """
9627   _OP_PARAMS = [
9628     ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9629     ("use_locking", False, ht.TBool),
9630     ]
9631   REQ_BGL = False
9632
9633   def ExpandNames(self):
9634     self.needed_locks = {}
9635     self.share_locks[locking.LEVEL_NODE] = 1
9636     if not self.op.nodes:
9637       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9638     else:
9639       self.needed_locks[locking.LEVEL_NODE] = \
9640         _GetWantedNodes(self, self.op.nodes)
9641
9642   def Exec(self, feedback_fn):
9643     """Compute the list of all the exported system images.
9644
9645     @rtype: dict
9646     @return: a dictionary with the structure node->(export-list)
9647         where export-list is a list of the instances exported on
9648         that node.
9649
9650     """
9651     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9652     rpcresult = self.rpc.call_export_list(self.nodes)
9653     result = {}
9654     for node in rpcresult:
9655       if rpcresult[node].fail_msg:
9656         result[node] = False
9657       else:
9658         result[node] = rpcresult[node].payload
9659
9660     return result
9661
9662
9663 class LUPrepareExport(NoHooksLU):
9664   """Prepares an instance for an export and returns useful information.
9665
9666   """
9667   _OP_PARAMS = [
9668     _PInstanceName,
9669     ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9670     ]
9671   REQ_BGL = False
9672
9673   def ExpandNames(self):
9674     self._ExpandAndLockInstance()
9675
9676   def CheckPrereq(self):
9677     """Check prerequisites.
9678
9679     """
9680     instance_name = self.op.instance_name
9681
9682     self.instance = self.cfg.GetInstanceInfo(instance_name)
9683     assert self.instance is not None, \
9684           "Cannot retrieve locked instance %s" % self.op.instance_name
9685     _CheckNodeOnline(self, self.instance.primary_node)
9686
9687     self._cds = _GetClusterDomainSecret()
9688
9689   def Exec(self, feedback_fn):
9690     """Prepares an instance for an export.
9691
9692     """
9693     instance = self.instance
9694
9695     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9696       salt = utils.GenerateSecret(8)
9697
9698       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9699       result = self.rpc.call_x509_cert_create(instance.primary_node,
9700                                               constants.RIE_CERT_VALIDITY)
9701       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9702
9703       (name, cert_pem) = result.payload
9704
9705       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9706                                              cert_pem)
9707
9708       return {
9709         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9710         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9711                           salt),
9712         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9713         }
9714
9715     return None
9716
9717
9718 class LUExportInstance(LogicalUnit):
9719   """Export an instance to an image in the cluster.
9720
9721   """
9722   HPATH = "instance-export"
9723   HTYPE = constants.HTYPE_INSTANCE
9724   _OP_PARAMS = [
9725     _PInstanceName,
9726     ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9727     ("shutdown", True, ht.TBool),
9728     _PShutdownTimeout,
9729     ("remove_instance", False, ht.TBool),
9730     ("ignore_remove_failures", False, ht.TBool),
9731     ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9732     ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9733     ("destination_x509_ca", None, ht.TMaybeString),
9734     ]
9735   REQ_BGL = False
9736
9737   def CheckArguments(self):
9738     """Check the arguments.
9739
9740     """
9741     self.x509_key_name = self.op.x509_key_name
9742     self.dest_x509_ca_pem = self.op.destination_x509_ca
9743
9744     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9745       if not self.x509_key_name:
9746         raise errors.OpPrereqError("Missing X509 key name for encryption",
9747                                    errors.ECODE_INVAL)
9748
9749       if not self.dest_x509_ca_pem:
9750         raise errors.OpPrereqError("Missing destination X509 CA",
9751                                    errors.ECODE_INVAL)
9752
9753   def ExpandNames(self):
9754     self._ExpandAndLockInstance()
9755
9756     # Lock all nodes for local exports
9757     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9758       # FIXME: lock only instance primary and destination node
9759       #
9760       # Sad but true, for now we have do lock all nodes, as we don't know where
9761       # the previous export might be, and in this LU we search for it and
9762       # remove it from its current node. In the future we could fix this by:
9763       #  - making a tasklet to search (share-lock all), then create the
9764       #    new one, then one to remove, after
9765       #  - removing the removal operation altogether
9766       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9767
9768   def DeclareLocks(self, level):
9769     """Last minute lock declaration."""
9770     # All nodes are locked anyway, so nothing to do here.
9771
9772   def BuildHooksEnv(self):
9773     """Build hooks env.
9774
9775     This will run on the master, primary node and target node.
9776
9777     """
9778     env = {
9779       "EXPORT_MODE": self.op.mode,
9780       "EXPORT_NODE": self.op.target_node,
9781       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9782       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9783       # TODO: Generic function for boolean env variables
9784       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9785       }
9786
9787     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9788
9789     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9790
9791     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9792       nl.append(self.op.target_node)
9793
9794     return env, nl, nl
9795
9796   def CheckPrereq(self):
9797     """Check prerequisites.
9798
9799     This checks that the instance and node names are valid.
9800
9801     """
9802     instance_name = self.op.instance_name
9803
9804     self.instance = self.cfg.GetInstanceInfo(instance_name)
9805     assert self.instance is not None, \
9806           "Cannot retrieve locked instance %s" % self.op.instance_name
9807     _CheckNodeOnline(self, self.instance.primary_node)
9808
9809     if (self.op.remove_instance and self.instance.admin_up and
9810         not self.op.shutdown):
9811       raise errors.OpPrereqError("Can not remove instance without shutting it"
9812                                  " down before")
9813
9814     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9815       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9816       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9817       assert self.dst_node is not None
9818
9819       _CheckNodeOnline(self, self.dst_node.name)
9820       _CheckNodeNotDrained(self, self.dst_node.name)
9821
9822       self._cds = None
9823       self.dest_disk_info = None
9824       self.dest_x509_ca = None
9825
9826     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9827       self.dst_node = None
9828
9829       if len(self.op.target_node) != len(self.instance.disks):
9830         raise errors.OpPrereqError(("Received destination information for %s"
9831                                     " disks, but instance %s has %s disks") %
9832                                    (len(self.op.target_node), instance_name,
9833                                     len(self.instance.disks)),
9834                                    errors.ECODE_INVAL)
9835
9836       cds = _GetClusterDomainSecret()
9837
9838       # Check X509 key name
9839       try:
9840         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9841       except (TypeError, ValueError), err:
9842         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9843
9844       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9845         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9846                                    errors.ECODE_INVAL)
9847
9848       # Load and verify CA
9849       try:
9850         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9851       except OpenSSL.crypto.Error, err:
9852         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9853                                    (err, ), errors.ECODE_INVAL)
9854
9855       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9856       if errcode is not None:
9857         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9858                                    (msg, ), errors.ECODE_INVAL)
9859
9860       self.dest_x509_ca = cert
9861
9862       # Verify target information
9863       disk_info = []
9864       for idx, disk_data in enumerate(self.op.target_node):
9865         try:
9866           (host, port, magic) = \
9867             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9868         except errors.GenericError, err:
9869           raise errors.OpPrereqError("Target info for disk %s: %s" %
9870                                      (idx, err), errors.ECODE_INVAL)
9871
9872         disk_info.append((host, port, magic))
9873
9874       assert len(disk_info) == len(self.op.target_node)
9875       self.dest_disk_info = disk_info
9876
9877     else:
9878       raise errors.ProgrammerError("Unhandled export mode %r" %
9879                                    self.op.mode)
9880
9881     # instance disk type verification
9882     # TODO: Implement export support for file-based disks
9883     for disk in self.instance.disks:
9884       if disk.dev_type == constants.LD_FILE:
9885         raise errors.OpPrereqError("Export not supported for instances with"
9886                                    " file-based disks", errors.ECODE_INVAL)
9887
9888   def _CleanupExports(self, feedback_fn):
9889     """Removes exports of current instance from all other nodes.
9890
9891     If an instance in a cluster with nodes A..D was exported to node C, its
9892     exports will be removed from the nodes A, B and D.
9893
9894     """
9895     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9896
9897     nodelist = self.cfg.GetNodeList()
9898     nodelist.remove(self.dst_node.name)
9899
9900     # on one-node clusters nodelist will be empty after the removal
9901     # if we proceed the backup would be removed because OpQueryExports
9902     # substitutes an empty list with the full cluster node list.
9903     iname = self.instance.name
9904     if nodelist:
9905       feedback_fn("Removing old exports for instance %s" % iname)
9906       exportlist = self.rpc.call_export_list(nodelist)
9907       for node in exportlist:
9908         if exportlist[node].fail_msg:
9909           continue
9910         if iname in exportlist[node].payload:
9911           msg = self.rpc.call_export_remove(node, iname).fail_msg
9912           if msg:
9913             self.LogWarning("Could not remove older export for instance %s"
9914                             " on node %s: %s", iname, node, msg)
9915
9916   def Exec(self, feedback_fn):
9917     """Export an instance to an image in the cluster.
9918
9919     """
9920     assert self.op.mode in constants.EXPORT_MODES
9921
9922     instance = self.instance
9923     src_node = instance.primary_node
9924
9925     if self.op.shutdown:
9926       # shutdown the instance, but not the disks
9927       feedback_fn("Shutting down instance %s" % instance.name)
9928       result = self.rpc.call_instance_shutdown(src_node, instance,
9929                                                self.op.shutdown_timeout)
9930       # TODO: Maybe ignore failures if ignore_remove_failures is set
9931       result.Raise("Could not shutdown instance %s on"
9932                    " node %s" % (instance.name, src_node))
9933
9934     # set the disks ID correctly since call_instance_start needs the
9935     # correct drbd minor to create the symlinks
9936     for disk in instance.disks:
9937       self.cfg.SetDiskID(disk, src_node)
9938
9939     activate_disks = (not instance.admin_up)
9940
9941     if activate_disks:
9942       # Activate the instance disks if we'exporting a stopped instance
9943       feedback_fn("Activating disks for %s" % instance.name)
9944       _StartInstanceDisks(self, instance, None)
9945
9946     try:
9947       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9948                                                      instance)
9949
9950       helper.CreateSnapshots()
9951       try:
9952         if (self.op.shutdown and instance.admin_up and
9953             not self.op.remove_instance):
9954           assert not activate_disks
9955           feedback_fn("Starting instance %s" % instance.name)
9956           result = self.rpc.call_instance_start(src_node, instance, None, None)
9957           msg = result.fail_msg
9958           if msg:
9959             feedback_fn("Failed to start instance: %s" % msg)
9960             _ShutdownInstanceDisks(self, instance)
9961             raise errors.OpExecError("Could not start instance: %s" % msg)
9962
9963         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9964           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9965         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9966           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9967           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9968
9969           (key_name, _, _) = self.x509_key_name
9970
9971           dest_ca_pem = \
9972             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9973                                             self.dest_x509_ca)
9974
9975           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9976                                                      key_name, dest_ca_pem,
9977                                                      timeouts)
9978       finally:
9979         helper.Cleanup()
9980
9981       # Check for backwards compatibility
9982       assert len(dresults) == len(instance.disks)
9983       assert compat.all(isinstance(i, bool) for i in dresults), \
9984              "Not all results are boolean: %r" % dresults
9985
9986     finally:
9987       if activate_disks:
9988         feedback_fn("Deactivating disks for %s" % instance.name)
9989         _ShutdownInstanceDisks(self, instance)
9990
9991     if not (compat.all(dresults) and fin_resu):
9992       failures = []
9993       if not fin_resu:
9994         failures.append("export finalization")
9995       if not compat.all(dresults):
9996         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9997                                if not dsk)
9998         failures.append("disk export: disk(s) %s" % fdsk)
9999
10000       raise errors.OpExecError("Export failed, errors in %s" %
10001                                utils.CommaJoin(failures))
10002
10003     # At this point, the export was successful, we can cleanup/finish
10004
10005     # Remove instance if requested
10006     if self.op.remove_instance:
10007       feedback_fn("Removing instance %s" % instance.name)
10008       _RemoveInstance(self, feedback_fn, instance,
10009                       self.op.ignore_remove_failures)
10010
10011     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10012       self._CleanupExports(feedback_fn)
10013
10014     return fin_resu, dresults
10015
10016
10017 class LURemoveExport(NoHooksLU):
10018   """Remove exports related to the named instance.
10019
10020   """
10021   _OP_PARAMS = [
10022     _PInstanceName,
10023     ]
10024   REQ_BGL = False
10025
10026   def ExpandNames(self):
10027     self.needed_locks = {}
10028     # We need all nodes to be locked in order for RemoveExport to work, but we
10029     # don't need to lock the instance itself, as nothing will happen to it (and
10030     # we can remove exports also for a removed instance)
10031     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10032
10033   def Exec(self, feedback_fn):
10034     """Remove any export.
10035
10036     """
10037     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10038     # If the instance was not found we'll try with the name that was passed in.
10039     # This will only work if it was an FQDN, though.
10040     fqdn_warn = False
10041     if not instance_name:
10042       fqdn_warn = True
10043       instance_name = self.op.instance_name
10044
10045     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10046     exportlist = self.rpc.call_export_list(locked_nodes)
10047     found = False
10048     for node in exportlist:
10049       msg = exportlist[node].fail_msg
10050       if msg:
10051         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10052         continue
10053       if instance_name in exportlist[node].payload:
10054         found = True
10055         result = self.rpc.call_export_remove(node, instance_name)
10056         msg = result.fail_msg
10057         if msg:
10058           logging.error("Could not remove export for instance %s"
10059                         " on node %s: %s", instance_name, node, msg)
10060
10061     if fqdn_warn and not found:
10062       feedback_fn("Export not found. If trying to remove an export belonging"
10063                   " to a deleted instance please use its Fully Qualified"
10064                   " Domain Name.")
10065
10066
10067 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
10068   """Generic tags LU.
10069
10070   This is an abstract class which is the parent of all the other tags LUs.
10071
10072   """
10073
10074   def ExpandNames(self):
10075     self.needed_locks = {}
10076     if self.op.kind == constants.TAG_NODE:
10077       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
10078       self.needed_locks[locking.LEVEL_NODE] = self.op.name
10079     elif self.op.kind == constants.TAG_INSTANCE:
10080       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
10081       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
10082
10083     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
10084     # not possible to acquire the BGL based on opcode parameters)
10085
10086   def CheckPrereq(self):
10087     """Check prerequisites.
10088
10089     """
10090     if self.op.kind == constants.TAG_CLUSTER:
10091       self.target = self.cfg.GetClusterInfo()
10092     elif self.op.kind == constants.TAG_NODE:
10093       self.target = self.cfg.GetNodeInfo(self.op.name)
10094     elif self.op.kind == constants.TAG_INSTANCE:
10095       self.target = self.cfg.GetInstanceInfo(self.op.name)
10096     else:
10097       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
10098                                  str(self.op.kind), errors.ECODE_INVAL)
10099
10100
10101 class LUGetTags(TagsLU):
10102   """Returns the tags of a given object.
10103
10104   """
10105   _OP_PARAMS = [
10106     ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10107     # Name is only meaningful for nodes and instances
10108     ("name", ht.NoDefault, ht.TMaybeString),
10109     ]
10110   REQ_BGL = False
10111
10112   def ExpandNames(self):
10113     TagsLU.ExpandNames(self)
10114
10115     # Share locks as this is only a read operation
10116     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
10117
10118   def Exec(self, feedback_fn):
10119     """Returns the tag list.
10120
10121     """
10122     return list(self.target.GetTags())
10123
10124
10125 class LUSearchTags(NoHooksLU):
10126   """Searches the tags for a given pattern.
10127
10128   """
10129   _OP_PARAMS = [
10130     ("pattern", ht.NoDefault, ht.TNonEmptyString),
10131     ]
10132   REQ_BGL = False
10133
10134   def ExpandNames(self):
10135     self.needed_locks = {}
10136
10137   def CheckPrereq(self):
10138     """Check prerequisites.
10139
10140     This checks the pattern passed for validity by compiling it.
10141
10142     """
10143     try:
10144       self.re = re.compile(self.op.pattern)
10145     except re.error, err:
10146       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
10147                                  (self.op.pattern, err), errors.ECODE_INVAL)
10148
10149   def Exec(self, feedback_fn):
10150     """Returns the tag list.
10151
10152     """
10153     cfg = self.cfg
10154     tgts = [("/cluster", cfg.GetClusterInfo())]
10155     ilist = cfg.GetAllInstancesInfo().values()
10156     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
10157     nlist = cfg.GetAllNodesInfo().values()
10158     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
10159     results = []
10160     for path, target in tgts:
10161       for tag in target.GetTags():
10162         if self.re.search(tag):
10163           results.append((path, tag))
10164     return results
10165
10166
10167 class LUAddTags(TagsLU):
10168   """Sets a tag on a given object.
10169
10170   """
10171   _OP_PARAMS = [
10172     ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10173     # Name is only meaningful for nodes and instances
10174     ("name", ht.NoDefault, ht.TMaybeString),
10175     ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10176     ]
10177   REQ_BGL = False
10178
10179   def CheckPrereq(self):
10180     """Check prerequisites.
10181
10182     This checks the type and length of the tag name and value.
10183
10184     """
10185     TagsLU.CheckPrereq(self)
10186     for tag in self.op.tags:
10187       objects.TaggableObject.ValidateTag(tag)
10188
10189   def Exec(self, feedback_fn):
10190     """Sets the tag.
10191
10192     """
10193     try:
10194       for tag in self.op.tags:
10195         self.target.AddTag(tag)
10196     except errors.TagError, err:
10197       raise errors.OpExecError("Error while setting tag: %s" % str(err))
10198     self.cfg.Update(self.target, feedback_fn)
10199
10200
10201 class LUDelTags(TagsLU):
10202   """Delete a list of tags from a given object.
10203
10204   """
10205   _OP_PARAMS = [
10206     ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
10207     # Name is only meaningful for nodes and instances
10208     ("name", ht.NoDefault, ht.TMaybeString),
10209     ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
10210     ]
10211   REQ_BGL = False
10212
10213   def CheckPrereq(self):
10214     """Check prerequisites.
10215
10216     This checks that we have the given tag.
10217
10218     """
10219     TagsLU.CheckPrereq(self)
10220     for tag in self.op.tags:
10221       objects.TaggableObject.ValidateTag(tag)
10222     del_tags = frozenset(self.op.tags)
10223     cur_tags = self.target.GetTags()
10224
10225     diff_tags = del_tags - cur_tags
10226     if diff_tags:
10227       diff_names = ("'%s'" % i for i in sorted(diff_tags))
10228       raise errors.OpPrereqError("Tag(s) %s not found" %
10229                                  (utils.CommaJoin(diff_names), ),
10230                                  errors.ECODE_NOENT)
10231
10232   def Exec(self, feedback_fn):
10233     """Remove the tag from the object.
10234
10235     """
10236     for tag in self.op.tags:
10237       self.target.RemoveTag(tag)
10238     self.cfg.Update(self.target, feedback_fn)
10239
10240
10241 class LUTestDelay(NoHooksLU):
10242   """Sleep for a specified amount of time.
10243
10244   This LU sleeps on the master and/or nodes for a specified amount of
10245   time.
10246
10247   """
10248   _OP_PARAMS = [
10249     ("duration", ht.NoDefault, ht.TFloat),
10250     ("on_master", True, ht.TBool),
10251     ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10252     ("repeat", 0, ht.TPositiveInt)
10253     ]
10254   REQ_BGL = False
10255
10256   def ExpandNames(self):
10257     """Expand names and set required locks.
10258
10259     This expands the node list, if any.
10260
10261     """
10262     self.needed_locks = {}
10263     if self.op.on_nodes:
10264       # _GetWantedNodes can be used here, but is not always appropriate to use
10265       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
10266       # more information.
10267       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
10268       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
10269
10270   def _TestDelay(self):
10271     """Do the actual sleep.
10272
10273     """
10274     if self.op.on_master:
10275       if not utils.TestDelay(self.op.duration):
10276         raise errors.OpExecError("Error during master delay test")
10277     if self.op.on_nodes:
10278       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
10279       for node, node_result in result.items():
10280         node_result.Raise("Failure during rpc call to node %s" % node)
10281
10282   def Exec(self, feedback_fn):
10283     """Execute the test delay opcode, with the wanted repetitions.
10284
10285     """
10286     if self.op.repeat == 0:
10287       self._TestDelay()
10288     else:
10289       top_value = self.op.repeat - 1
10290       for i in range(self.op.repeat):
10291         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
10292         self._TestDelay()
10293
10294
10295 class LUTestJobqueue(NoHooksLU):
10296   """Utility LU to test some aspects of the job queue.
10297
10298   """
10299   _OP_PARAMS = [
10300     ("notify_waitlock", False, ht.TBool),
10301     ("notify_exec", False, ht.TBool),
10302     ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
10303     ("fail", False, ht.TBool),
10304     ]
10305   REQ_BGL = False
10306
10307   # Must be lower than default timeout for WaitForJobChange to see whether it
10308   # notices changed jobs
10309   _CLIENT_CONNECT_TIMEOUT = 20.0
10310   _CLIENT_CONFIRM_TIMEOUT = 60.0
10311
10312   @classmethod
10313   def _NotifyUsingSocket(cls, cb, errcls):
10314     """Opens a Unix socket and waits for another program to connect.
10315
10316     @type cb: callable
10317     @param cb: Callback to send socket name to client
10318     @type errcls: class
10319     @param errcls: Exception class to use for errors
10320
10321     """
10322     # Using a temporary directory as there's no easy way to create temporary
10323     # sockets without writing a custom loop around tempfile.mktemp and
10324     # socket.bind
10325     tmpdir = tempfile.mkdtemp()
10326     try:
10327       tmpsock = utils.PathJoin(tmpdir, "sock")
10328
10329       logging.debug("Creating temporary socket at %s", tmpsock)
10330       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
10331       try:
10332         sock.bind(tmpsock)
10333         sock.listen(1)
10334
10335         # Send details to client
10336         cb(tmpsock)
10337
10338         # Wait for client to connect before continuing
10339         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
10340         try:
10341           (conn, _) = sock.accept()
10342         except socket.error, err:
10343           raise errcls("Client didn't connect in time (%s)" % err)
10344       finally:
10345         sock.close()
10346     finally:
10347       # Remove as soon as client is connected
10348       shutil.rmtree(tmpdir)
10349
10350     # Wait for client to close
10351     try:
10352       try:
10353         # pylint: disable-msg=E1101
10354         # Instance of '_socketobject' has no ... member
10355         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
10356         conn.recv(1)
10357       except socket.error, err:
10358         raise errcls("Client failed to confirm notification (%s)" % err)
10359     finally:
10360       conn.close()
10361
10362   def _SendNotification(self, test, arg, sockname):
10363     """Sends a notification to the client.
10364
10365     @type test: string
10366     @param test: Test name
10367     @param arg: Test argument (depends on test)
10368     @type sockname: string
10369     @param sockname: Socket path
10370
10371     """
10372     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
10373
10374   def _Notify(self, prereq, test, arg):
10375     """Notifies the client of a test.
10376
10377     @type prereq: bool
10378     @param prereq: Whether this is a prereq-phase test
10379     @type test: string
10380     @param test: Test name
10381     @param arg: Test argument (depends on test)
10382
10383     """
10384     if prereq:
10385       errcls = errors.OpPrereqError
10386     else:
10387       errcls = errors.OpExecError
10388
10389     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
10390                                                   test, arg),
10391                                    errcls)
10392
10393   def CheckArguments(self):
10394     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
10395     self.expandnames_calls = 0
10396
10397   def ExpandNames(self):
10398     checkargs_calls = getattr(self, "checkargs_calls", 0)
10399     if checkargs_calls < 1:
10400       raise errors.ProgrammerError("CheckArguments was not called")
10401
10402     self.expandnames_calls += 1
10403
10404     if self.op.notify_waitlock:
10405       self._Notify(True, constants.JQT_EXPANDNAMES, None)
10406
10407     self.LogInfo("Expanding names")
10408
10409     # Get lock on master node (just to get a lock, not for a particular reason)
10410     self.needed_locks = {
10411       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10412       }
10413
10414   def Exec(self, feedback_fn):
10415     if self.expandnames_calls < 1:
10416       raise errors.ProgrammerError("ExpandNames was not called")
10417
10418     if self.op.notify_exec:
10419       self._Notify(False, constants.JQT_EXEC, None)
10420
10421     self.LogInfo("Executing")
10422
10423     if self.op.log_messages:
10424       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10425       for idx, msg in enumerate(self.op.log_messages):
10426         self.LogInfo("Sending log message %s", idx + 1)
10427         feedback_fn(constants.JQT_MSGPREFIX + msg)
10428         # Report how many test messages have been sent
10429         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10430
10431     if self.op.fail:
10432       raise errors.OpExecError("Opcode failure was requested")
10433
10434     return True
10435
10436
10437 class IAllocator(object):
10438   """IAllocator framework.
10439
10440   An IAllocator instance has three sets of attributes:
10441     - cfg that is needed to query the cluster
10442     - input data (all members of the _KEYS class attribute are required)
10443     - four buffer attributes (in|out_data|text), that represent the
10444       input (to the external script) in text and data structure format,
10445       and the output from it, again in two formats
10446     - the result variables from the script (success, info, nodes) for
10447       easy usage
10448
10449   """
10450   # pylint: disable-msg=R0902
10451   # lots of instance attributes
10452   _ALLO_KEYS = [
10453     "name", "mem_size", "disks", "disk_template",
10454     "os", "tags", "nics", "vcpus", "hypervisor",
10455     ]
10456   _RELO_KEYS = [
10457     "name", "relocate_from",
10458     ]
10459   _EVAC_KEYS = [
10460     "evac_nodes",
10461     ]
10462
10463   def __init__(self, cfg, rpc, mode, **kwargs):
10464     self.cfg = cfg
10465     self.rpc = rpc
10466     # init buffer variables
10467     self.in_text = self.out_text = self.in_data = self.out_data = None
10468     # init all input fields so that pylint is happy
10469     self.mode = mode
10470     self.mem_size = self.disks = self.disk_template = None
10471     self.os = self.tags = self.nics = self.vcpus = None
10472     self.hypervisor = None
10473     self.relocate_from = None
10474     self.name = None
10475     self.evac_nodes = None
10476     # computed fields
10477     self.required_nodes = None
10478     # init result fields
10479     self.success = self.info = self.result = None
10480     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10481       keyset = self._ALLO_KEYS
10482       fn = self._AddNewInstance
10483     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10484       keyset = self._RELO_KEYS
10485       fn = self._AddRelocateInstance
10486     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10487       keyset = self._EVAC_KEYS
10488       fn = self._AddEvacuateNodes
10489     else:
10490       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10491                                    " IAllocator" % self.mode)
10492     for key in kwargs:
10493       if key not in keyset:
10494         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10495                                      " IAllocator" % key)
10496       setattr(self, key, kwargs[key])
10497
10498     for key in keyset:
10499       if key not in kwargs:
10500         raise errors.ProgrammerError("Missing input parameter '%s' to"
10501                                      " IAllocator" % key)
10502     self._BuildInputData(fn)
10503
10504   def _ComputeClusterData(self):
10505     """Compute the generic allocator input data.
10506
10507     This is the data that is independent of the actual operation.
10508
10509     """
10510     cfg = self.cfg
10511     cluster_info = cfg.GetClusterInfo()
10512     # cluster data
10513     data = {
10514       "version": constants.IALLOCATOR_VERSION,
10515       "cluster_name": cfg.GetClusterName(),
10516       "cluster_tags": list(cluster_info.GetTags()),
10517       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10518       # we don't have job IDs
10519       }
10520     iinfo = cfg.GetAllInstancesInfo().values()
10521     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10522
10523     # node data
10524     node_list = cfg.GetNodeList()
10525
10526     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10527       hypervisor_name = self.hypervisor
10528     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10529       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10530     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10531       hypervisor_name = cluster_info.enabled_hypervisors[0]
10532
10533     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10534                                         hypervisor_name)
10535     node_iinfo = \
10536       self.rpc.call_all_instances_info(node_list,
10537                                        cluster_info.enabled_hypervisors)
10538
10539     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
10540
10541     data["nodes"] = self._ComputeNodeData(cfg, node_data, node_iinfo, i_list)
10542
10543     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
10544
10545     self.in_data = data
10546
10547   @staticmethod
10548   def _ComputeNodeGroupData(cfg):
10549     """Compute node groups data.
10550
10551     """
10552     ng = {}
10553     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
10554       ng[guuid] = { "name": gdata.name }
10555     return ng
10556
10557   @staticmethod
10558   def _ComputeNodeData(cfg, node_data, node_iinfo, i_list):
10559     """Compute global node data.
10560
10561     """
10562     node_results = {}
10563     for nname, nresult in node_data.items():
10564       # first fill in static (config-based) values
10565       ninfo = cfg.GetNodeInfo(nname)
10566       pnr = {
10567         "tags": list(ninfo.GetTags()),
10568         "primary_ip": ninfo.primary_ip,
10569         "secondary_ip": ninfo.secondary_ip,
10570         "offline": ninfo.offline,
10571         "drained": ninfo.drained,
10572         "master_candidate": ninfo.master_candidate,
10573         "group": ninfo.group,
10574         "master_capable": ninfo.master_capable,
10575         "vm_capable": ninfo.vm_capable,
10576         }
10577
10578       if not (ninfo.offline or ninfo.drained):
10579         nresult.Raise("Can't get data for node %s" % nname)
10580         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10581                                 nname)
10582         remote_info = nresult.payload
10583
10584         for attr in ['memory_total', 'memory_free', 'memory_dom0',
10585                      'vg_size', 'vg_free', 'cpu_total']:
10586           if attr not in remote_info:
10587             raise errors.OpExecError("Node '%s' didn't return attribute"
10588                                      " '%s'" % (nname, attr))
10589           if not isinstance(remote_info[attr], int):
10590             raise errors.OpExecError("Node '%s' returned invalid value"
10591                                      " for '%s': %s" %
10592                                      (nname, attr, remote_info[attr]))
10593         # compute memory used by primary instances
10594         i_p_mem = i_p_up_mem = 0
10595         for iinfo, beinfo in i_list:
10596           if iinfo.primary_node == nname:
10597             i_p_mem += beinfo[constants.BE_MEMORY]
10598             if iinfo.name not in node_iinfo[nname].payload:
10599               i_used_mem = 0
10600             else:
10601               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10602             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10603             remote_info['memory_free'] -= max(0, i_mem_diff)
10604
10605             if iinfo.admin_up:
10606               i_p_up_mem += beinfo[constants.BE_MEMORY]
10607
10608         # compute memory used by instances
10609         pnr_dyn = {
10610           "total_memory": remote_info['memory_total'],
10611           "reserved_memory": remote_info['memory_dom0'],
10612           "free_memory": remote_info['memory_free'],
10613           "total_disk": remote_info['vg_size'],
10614           "free_disk": remote_info['vg_free'],
10615           "total_cpus": remote_info['cpu_total'],
10616           "i_pri_memory": i_p_mem,
10617           "i_pri_up_memory": i_p_up_mem,
10618           }
10619         pnr.update(pnr_dyn)
10620
10621       node_results[nname] = pnr
10622
10623     return node_results
10624
10625   @staticmethod
10626   def _ComputeInstanceData(cluster_info, i_list):
10627     """Compute global instance data.
10628
10629     """
10630     instance_data = {}
10631     for iinfo, beinfo in i_list:
10632       nic_data = []
10633       for nic in iinfo.nics:
10634         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10635         nic_dict = {"mac": nic.mac,
10636                     "ip": nic.ip,
10637                     "mode": filled_params[constants.NIC_MODE],
10638                     "link": filled_params[constants.NIC_LINK],
10639                    }
10640         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10641           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10642         nic_data.append(nic_dict)
10643       pir = {
10644         "tags": list(iinfo.GetTags()),
10645         "admin_up": iinfo.admin_up,
10646         "vcpus": beinfo[constants.BE_VCPUS],
10647         "memory": beinfo[constants.BE_MEMORY],
10648         "os": iinfo.os,
10649         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10650         "nics": nic_data,
10651         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10652         "disk_template": iinfo.disk_template,
10653         "hypervisor": iinfo.hypervisor,
10654         }
10655       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10656                                                  pir["disks"])
10657       instance_data[iinfo.name] = pir
10658
10659     return instance_data
10660
10661   def _AddNewInstance(self):
10662     """Add new instance data to allocator structure.
10663
10664     This in combination with _AllocatorGetClusterData will create the
10665     correct structure needed as input for the allocator.
10666
10667     The checks for the completeness of the opcode must have already been
10668     done.
10669
10670     """
10671     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10672
10673     if self.disk_template in constants.DTS_NET_MIRROR:
10674       self.required_nodes = 2
10675     else:
10676       self.required_nodes = 1
10677     request = {
10678       "name": self.name,
10679       "disk_template": self.disk_template,
10680       "tags": self.tags,
10681       "os": self.os,
10682       "vcpus": self.vcpus,
10683       "memory": self.mem_size,
10684       "disks": self.disks,
10685       "disk_space_total": disk_space,
10686       "nics": self.nics,
10687       "required_nodes": self.required_nodes,
10688       }
10689     return request
10690
10691   def _AddRelocateInstance(self):
10692     """Add relocate instance data to allocator structure.
10693
10694     This in combination with _IAllocatorGetClusterData will create the
10695     correct structure needed as input for the allocator.
10696
10697     The checks for the completeness of the opcode must have already been
10698     done.
10699
10700     """
10701     instance = self.cfg.GetInstanceInfo(self.name)
10702     if instance is None:
10703       raise errors.ProgrammerError("Unknown instance '%s' passed to"
10704                                    " IAllocator" % self.name)
10705
10706     if instance.disk_template not in constants.DTS_NET_MIRROR:
10707       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10708                                  errors.ECODE_INVAL)
10709
10710     if len(instance.secondary_nodes) != 1:
10711       raise errors.OpPrereqError("Instance has not exactly one secondary node",
10712                                  errors.ECODE_STATE)
10713
10714     self.required_nodes = 1
10715     disk_sizes = [{'size': disk.size} for disk in instance.disks]
10716     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10717
10718     request = {
10719       "name": self.name,
10720       "disk_space_total": disk_space,
10721       "required_nodes": self.required_nodes,
10722       "relocate_from": self.relocate_from,
10723       }
10724     return request
10725
10726   def _AddEvacuateNodes(self):
10727     """Add evacuate nodes data to allocator structure.
10728
10729     """
10730     request = {
10731       "evac_nodes": self.evac_nodes
10732       }
10733     return request
10734
10735   def _BuildInputData(self, fn):
10736     """Build input data structures.
10737
10738     """
10739     self._ComputeClusterData()
10740
10741     request = fn()
10742     request["type"] = self.mode
10743     self.in_data["request"] = request
10744
10745     self.in_text = serializer.Dump(self.in_data)
10746
10747   def Run(self, name, validate=True, call_fn=None):
10748     """Run an instance allocator and return the results.
10749
10750     """
10751     if call_fn is None:
10752       call_fn = self.rpc.call_iallocator_runner
10753
10754     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10755     result.Raise("Failure while running the iallocator script")
10756
10757     self.out_text = result.payload
10758     if validate:
10759       self._ValidateResult()
10760
10761   def _ValidateResult(self):
10762     """Process the allocator results.
10763
10764     This will process and if successful save the result in
10765     self.out_data and the other parameters.
10766
10767     """
10768     try:
10769       rdict = serializer.Load(self.out_text)
10770     except Exception, err:
10771       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10772
10773     if not isinstance(rdict, dict):
10774       raise errors.OpExecError("Can't parse iallocator results: not a dict")
10775
10776     # TODO: remove backwards compatiblity in later versions
10777     if "nodes" in rdict and "result" not in rdict:
10778       rdict["result"] = rdict["nodes"]
10779       del rdict["nodes"]
10780
10781     for key in "success", "info", "result":
10782       if key not in rdict:
10783         raise errors.OpExecError("Can't parse iallocator results:"
10784                                  " missing key '%s'" % key)
10785       setattr(self, key, rdict[key])
10786
10787     if not isinstance(rdict["result"], list):
10788       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10789                                " is not a list")
10790     self.out_data = rdict
10791
10792
10793 class LUTestAllocator(NoHooksLU):
10794   """Run allocator tests.
10795
10796   This LU runs the allocator tests
10797
10798   """
10799   _OP_PARAMS = [
10800     ("direction", ht.NoDefault,
10801      ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10802     ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
10803     ("name", ht.NoDefault, ht.TNonEmptyString),
10804     ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
10805       ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
10806                ht.TOr(ht.TNone, ht.TNonEmptyString))))),
10807     ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
10808     ("hypervisor", None, ht.TMaybeString),
10809     ("allocator", None, ht.TMaybeString),
10810     ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10811     ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10812     ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10813     ("os", None, ht.TMaybeString),
10814     ("disk_template", None, ht.TMaybeString),
10815     ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
10816     ]
10817
10818   def CheckPrereq(self):
10819     """Check prerequisites.
10820
10821     This checks the opcode parameters depending on the director and mode test.
10822
10823     """
10824     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10825       for attr in ["mem_size", "disks", "disk_template",
10826                    "os", "tags", "nics", "vcpus"]:
10827         if not hasattr(self.op, attr):
10828           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10829                                      attr, errors.ECODE_INVAL)
10830       iname = self.cfg.ExpandInstanceName(self.op.name)
10831       if iname is not None:
10832         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10833                                    iname, errors.ECODE_EXISTS)
10834       if not isinstance(self.op.nics, list):
10835         raise errors.OpPrereqError("Invalid parameter 'nics'",
10836                                    errors.ECODE_INVAL)
10837       if not isinstance(self.op.disks, list):
10838         raise errors.OpPrereqError("Invalid parameter 'disks'",
10839                                    errors.ECODE_INVAL)
10840       for row in self.op.disks:
10841         if (not isinstance(row, dict) or
10842             "size" not in row or
10843             not isinstance(row["size"], int) or
10844             "mode" not in row or
10845             row["mode"] not in ['r', 'w']):
10846           raise errors.OpPrereqError("Invalid contents of the 'disks'"
10847                                      " parameter", errors.ECODE_INVAL)
10848       if self.op.hypervisor is None:
10849         self.op.hypervisor = self.cfg.GetHypervisorType()
10850     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10851       fname = _ExpandInstanceName(self.cfg, self.op.name)
10852       self.op.name = fname
10853       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10854     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10855       if not hasattr(self.op, "evac_nodes"):
10856         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10857                                    " opcode input", errors.ECODE_INVAL)
10858     else:
10859       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10860                                  self.op.mode, errors.ECODE_INVAL)
10861
10862     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10863       if self.op.allocator is None:
10864         raise errors.OpPrereqError("Missing allocator name",
10865                                    errors.ECODE_INVAL)
10866     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10867       raise errors.OpPrereqError("Wrong allocator test '%s'" %
10868                                  self.op.direction, errors.ECODE_INVAL)
10869
10870   def Exec(self, feedback_fn):
10871     """Run the allocator test.
10872
10873     """
10874     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10875       ial = IAllocator(self.cfg, self.rpc,
10876                        mode=self.op.mode,
10877                        name=self.op.name,
10878                        mem_size=self.op.mem_size,
10879                        disks=self.op.disks,
10880                        disk_template=self.op.disk_template,
10881                        os=self.op.os,
10882                        tags=self.op.tags,
10883                        nics=self.op.nics,
10884                        vcpus=self.op.vcpus,
10885                        hypervisor=self.op.hypervisor,
10886                        )
10887     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10888       ial = IAllocator(self.cfg, self.rpc,
10889                        mode=self.op.mode,
10890                        name=self.op.name,
10891                        relocate_from=list(self.relocate_from),
10892                        )
10893     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10894       ial = IAllocator(self.cfg, self.rpc,
10895                        mode=self.op.mode,
10896                        evac_nodes=self.op.evac_nodes)
10897     else:
10898       raise errors.ProgrammerError("Uncatched mode %s in"
10899                                    " LUTestAllocator.Exec", self.op.mode)
10900
10901     if self.op.direction == constants.IALLOCATOR_DIR_IN:
10902       result = ial.in_text
10903     else:
10904       ial.Run(self.op.allocator, validate=False)
10905       result = ial.out_text
10906     return result