code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42
  43 from ganeti import ssh
  44 from ganeti import utils
  45 from ganeti import errors
  46 from ganeti import hypervisor
  47 from ganeti import locking
  48 from ganeti import constants
  49 from ganeti import objects
  50 from ganeti import serializer
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import ht
  57
  58 import ganeti.masterd.instance # pylint: disable-msg=W0611
  59
  60 # Common opcode attributes
  61
  62 #: output fields for a query operation
  63 _POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
  64
  65
  66 #: the shutdown timeout
  67 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
  68                      ht.TPositiveInt)
  69
  70 #: the force parameter
  71 _PForce = ("force", False, ht.TBool)
  72
  73 #: a required instance name (for single-instance LUs)
  74 _PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
  75
  76 #: Whether to ignore offline nodes
  77 _PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
  78
  79 #: a required node name (for single-node LUs)
  80 _PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
  81
  82 #: the migration type (live/non-live)
  83 _PMigrationMode = ("mode", None,
  84                    ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
  85
  86 #: the obsolete 'live' mode (boolean)
  87 _PMigrationLive = ("live", None, ht.TMaybeBool)
  88
  89
  90 # End types
  91 class LogicalUnit(object):
  92   """Logical Unit base class.
  93
  94   Subclasses must follow these rules:
  95     - implement ExpandNames
  96     - implement CheckPrereq (except when tasklets are used)
  97     - implement Exec (except when tasklets are used)
  98     - implement BuildHooksEnv
  99     - redefine HPATH and HTYPE
 100     - optionally redefine their run requirements:
 101         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 102
 103   Note that all commands require root permissions.
 104
 105   @ivar dry_run_result: the value (if any) that will be returned to the caller
 106       in dry-run mode (signalled by opcode dry_run parameter)
 107   @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
 108       they should get if not already defined, and types they must match
 109
 110   """
 111   HPATH = None
 112   HTYPE = None
 113   _OP_PARAMS = []
 114   REQ_BGL = True
 115
 116   def __init__(self, processor, op, context, rpc):
 117     """Constructor for LogicalUnit.
 118
 119     This needs to be overridden in derived classes in order to check op
 120     validity.
 121
 122     """
 123     self.proc = processor
 124     self.op = op
 125     self.cfg = context.cfg
 126     self.context = context
 127     self.rpc = rpc
 128     # Dicts used to declare locking needs to mcpu
 129     self.needed_locks = None
 130     self.acquired_locks = {}
 131     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 132     self.add_locks = {}
 133     self.remove_locks = {}
 134     # Used to force good behavior when calling helper functions
 135     self.recalculate_locks = {}
 136     self.__ssh = None
 137     # logging
 138     self.Log = processor.Log # pylint: disable-msg=C0103
 139     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 140     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 141     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 142     # support for dry-run
 143     self.dry_run_result = None
 144     # support for generic debug attribute
 145     if (not hasattr(self.op, "debug_level") or
 146         not isinstance(self.op.debug_level, int)):
 147       self.op.debug_level = 0
 148
 149     # Tasklets
 150     self.tasklets = None
 151
 152     # The new kind-of-type-system
 153     op_id = self.op.OP_ID
 154     for attr_name, aval, test in self._OP_PARAMS:
 155       if not hasattr(op, attr_name):
 156         if aval == ht.NoDefault:
 157           raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
 158                                      (op_id, attr_name), errors.ECODE_INVAL)
 159         else:
 160           if callable(aval):
 161             dval = aval()
 162           else:
 163             dval = aval
 164           setattr(self.op, attr_name, dval)
 165       attr_val = getattr(op, attr_name)
 166       if test == ht.NoType:
 167         # no tests here
 168         continue
 169       if not callable(test):
 170         raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
 171                                      " given type is not a proper type (%s)" %
 172                                      (op_id, attr_name, test))
 173       if not test(attr_val):
 174         logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
 175                       self.op.OP_ID, attr_name, type(attr_val), attr_val)
 176         raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
 177                                    (op_id, attr_name), errors.ECODE_INVAL)
 178
 179     self.CheckArguments()
 180
 181   def __GetSSH(self):
 182     """Returns the SshRunner object
 183
 184     """
 185     if not self.__ssh:
 186       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 187     return self.__ssh
 188
 189   ssh = property(fget=__GetSSH)
 190
 191   def CheckArguments(self):
 192     """Check syntactic validity for the opcode arguments.
 193
 194     This method is for doing a simple syntactic check and ensure
 195     validity of opcode parameters, without any cluster-related
 196     checks. While the same can be accomplished in ExpandNames and/or
 197     CheckPrereq, doing these separate is better because:
 198
 199       - ExpandNames is left as as purely a lock-related function
 200       - CheckPrereq is run after we have acquired locks (and possible
 201         waited for them)
 202
 203     The function is allowed to change the self.op attribute so that
 204     later methods can no longer worry about missing parameters.
 205
 206     """
 207     pass
 208
 209   def ExpandNames(self):
 210     """Expand names for this LU.
 211
 212     This method is called before starting to execute the opcode, and it should
 213     update all the parameters of the opcode to their canonical form (e.g. a
 214     short node name must be fully expanded after this method has successfully
 215     completed). This way locking, hooks, logging, ecc. can work correctly.
 216
 217     LUs which implement this method must also populate the self.needed_locks
 218     member, as a dict with lock levels as keys, and a list of needed lock names
 219     as values. Rules:
 220
 221       - use an empty dict if you don't need any lock
 222       - if you don't need any lock at a particular level omit that level
 223       - don't put anything for the BGL level
 224       - if you want all locks at a level use locking.ALL_SET as a value
 225
 226     If you need to share locks (rather than acquire them exclusively) at one
 227     level you can modify self.share_locks, setting a true value (usually 1) for
 228     that level. By default locks are not shared.
 229
 230     This function can also define a list of tasklets, which then will be
 231     executed in order instead of the usual LU-level CheckPrereq and Exec
 232     functions, if those are not defined by the LU.
 233
 234     Examples::
 235
 236       # Acquire all nodes and one instance
 237       self.needed_locks = {
 238         locking.LEVEL_NODE: locking.ALL_SET,
 239         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 240       }
 241       # Acquire just two nodes
 242       self.needed_locks = {
 243         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 244       }
 245       # Acquire no locks
 246       self.needed_locks = {} # No, you can't leave it to the default value None
 247
 248     """
 249     # The implementation of this method is mandatory only if the new LU is
 250     # concurrent, so that old LUs don't need to be changed all at the same
 251     # time.
 252     if self.REQ_BGL:
 253       self.needed_locks = {} # Exclusive LUs don't need locks.
 254     else:
 255       raise NotImplementedError
 256
 257   def DeclareLocks(self, level):
 258     """Declare LU locking needs for a level
 259
 260     While most LUs can just declare their locking needs at ExpandNames time,
 261     sometimes there's the need to calculate some locks after having acquired
 262     the ones before. This function is called just before acquiring locks at a
 263     particular level, but after acquiring the ones at lower levels, and permits
 264     such calculations. It can be used to modify self.needed_locks, and by
 265     default it does nothing.
 266
 267     This function is only called if you have something already set in
 268     self.needed_locks for the level.
 269
 270     @param level: Locking level which is going to be locked
 271     @type level: member of ganeti.locking.LEVELS
 272
 273     """
 274
 275   def CheckPrereq(self):
 276     """Check prerequisites for this LU.
 277
 278     This method should check that the prerequisites for the execution
 279     of this LU are fulfilled. It can do internode communication, but
 280     it should be idempotent - no cluster or system changes are
 281     allowed.
 282
 283     The method should raise errors.OpPrereqError in case something is
 284     not fulfilled. Its return value is ignored.
 285
 286     This method should also update all the parameters of the opcode to
 287     their canonical form if it hasn't been done by ExpandNames before.
 288
 289     """
 290     if self.tasklets is not None:
 291       for (idx, tl) in enumerate(self.tasklets):
 292         logging.debug("Checking prerequisites for tasklet %s/%s",
 293                       idx + 1, len(self.tasklets))
 294         tl.CheckPrereq()
 295     else:
 296       pass
 297
 298   def Exec(self, feedback_fn):
 299     """Execute the LU.
 300
 301     This method should implement the actual work. It should raise
 302     errors.OpExecError for failures that are somewhat dealt with in
 303     code, or expected.
 304
 305     """
 306     if self.tasklets is not None:
 307       for (idx, tl) in enumerate(self.tasklets):
 308         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 309         tl.Exec(feedback_fn)
 310     else:
 311       raise NotImplementedError
 312
 313   def BuildHooksEnv(self):
 314     """Build hooks environment for this LU.
 315
 316     This method should return a three-node tuple consisting of: a dict
 317     containing the environment that will be used for running the
 318     specific hook for this LU, a list of node names on which the hook
 319     should run before the execution, and a list of node names on which
 320     the hook should run after the execution.
 321
 322     The keys of the dict must not have 'GANETI_' prefixed as this will
 323     be handled in the hooks runner. Also note additional keys will be
 324     added by the hooks runner. If the LU doesn't define any
 325     environment, an empty dict (and not None) should be returned.
 326
 327     No nodes should be returned as an empty list (and not None).
 328
 329     Note that if the HPATH for a LU class is None, this function will
 330     not be called.
 331
 332     """
 333     raise NotImplementedError
 334
 335   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 336     """Notify the LU about the results of its hooks.
 337
 338     This method is called every time a hooks phase is executed, and notifies
 339     the Logical Unit about the hooks' result. The LU can then use it to alter
 340     its result based on the hooks.  By default the method does nothing and the
 341     previous result is passed back unchanged but any LU can define it if it
 342     wants to use the local cluster hook-scripts somehow.
 343
 344     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 345         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 346     @param hook_results: the results of the multi-node hooks rpc call
 347     @param feedback_fn: function used send feedback back to the caller
 348     @param lu_result: the previous Exec result this LU had, or None
 349         in the PRE phase
 350     @return: the new Exec result, based on the previous result
 351         and hook results
 352
 353     """
 354     # API must be kept, thus we ignore the unused argument and could
 355     # be a function warnings
 356     # pylint: disable-msg=W0613,R0201
 357     return lu_result
 358
 359   def _ExpandAndLockInstance(self):
 360     """Helper function to expand and lock an instance.
 361
 362     Many LUs that work on an instance take its name in self.op.instance_name
 363     and need to expand it and then declare the expanded name for locking. This
 364     function does it, and then updates self.op.instance_name to the expanded
 365     name. It also initializes needed_locks as a dict, if this hasn't been done
 366     before.
 367
 368     """
 369     if self.needed_locks is None:
 370       self.needed_locks = {}
 371     else:
 372       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 373         "_ExpandAndLockInstance called with instance-level locks set"
 374     self.op.instance_name = _ExpandInstanceName(self.cfg,
 375                                                 self.op.instance_name)
 376     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 377
 378   def _LockInstancesNodes(self, primary_only=False):
 379     """Helper function to declare instances' nodes for locking.
 380
 381     This function should be called after locking one or more instances to lock
 382     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 383     with all primary or secondary nodes for instances already locked and
 384     present in self.needed_locks[locking.LEVEL_INSTANCE].
 385
 386     It should be called from DeclareLocks, and for safety only works if
 387     self.recalculate_locks[locking.LEVEL_NODE] is set.
 388
 389     In the future it may grow parameters to just lock some instance's nodes, or
 390     to just lock primaries or secondary nodes, if needed.
 391
 392     If should be called in DeclareLocks in a way similar to::
 393
 394       if level == locking.LEVEL_NODE:
 395         self._LockInstancesNodes()
 396
 397     @type primary_only: boolean
 398     @param primary_only: only lock primary nodes of locked instances
 399
 400     """
 401     assert locking.LEVEL_NODE in self.recalculate_locks, \
 402       "_LockInstancesNodes helper function called with no nodes to recalculate"
 403
 404     # TODO: check if we're really been called with the instance locks held
 405
 406     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 407     # future we might want to have different behaviors depending on the value
 408     # of self.recalculate_locks[locking.LEVEL_NODE]
 409     wanted_nodes = []
 410     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 411       instance = self.context.cfg.GetInstanceInfo(instance_name)
 412       wanted_nodes.append(instance.primary_node)
 413       if not primary_only:
 414         wanted_nodes.extend(instance.secondary_nodes)
 415
 416     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 417       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 418     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 419       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 420
 421     del self.recalculate_locks[locking.LEVEL_NODE]
 422
 423
 424 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 425   """Simple LU which runs no hooks.
 426
 427   This LU is intended as a parent for other LogicalUnits which will
 428   run no hooks, in order to reduce duplicate code.
 429
 430   """
 431   HPATH = None
 432   HTYPE = None
 433
 434   def BuildHooksEnv(self):
 435     """Empty BuildHooksEnv for NoHooksLu.
 436
 437     This just raises an error.
 438
 439     """
 440     assert False, "BuildHooksEnv called for NoHooksLUs"
 441
 442
 443 class Tasklet:
 444   """Tasklet base class.
 445
 446   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 447   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 448   tasklets know nothing about locks.
 449
 450   Subclasses must follow these rules:
 451     - Implement CheckPrereq
 452     - Implement Exec
 453
 454   """
 455   def __init__(self, lu):
 456     self.lu = lu
 457
 458     # Shortcuts
 459     self.cfg = lu.cfg
 460     self.rpc = lu.rpc
 461
 462   def CheckPrereq(self):
 463     """Check prerequisites for this tasklets.
 464
 465     This method should check whether the prerequisites for the execution of
 466     this tasklet are fulfilled. It can do internode communication, but it
 467     should be idempotent - no cluster or system changes are allowed.
 468
 469     The method should raise errors.OpPrereqError in case something is not
 470     fulfilled. Its return value is ignored.
 471
 472     This method should also update all parameters to their canonical form if it
 473     hasn't been done before.
 474
 475     """
 476     pass
 477
 478   def Exec(self, feedback_fn):
 479     """Execute the tasklet.
 480
 481     This method should implement the actual work. It should raise
 482     errors.OpExecError for failures that are somewhat dealt with in code, or
 483     expected.
 484
 485     """
 486     raise NotImplementedError
 487
 488
 489 def _GetWantedNodes(lu, nodes):
 490   """Returns list of checked and expanded node names.
 491
 492   @type lu: L{LogicalUnit}
 493   @param lu: the logical unit on whose behalf we execute
 494   @type nodes: list
 495   @param nodes: list of node names or None for all nodes
 496   @rtype: list
 497   @return: the list of nodes, sorted
 498   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 499
 500   """
 501   if not nodes:
 502     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 503       " non-empty list of nodes whose name is to be expanded.")
 504
 505   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 506   return utils.NiceSort(wanted)
 507
 508
 509 def _GetWantedInstances(lu, instances):
 510   """Returns list of checked and expanded instance names.
 511
 512   @type lu: L{LogicalUnit}
 513   @param lu: the logical unit on whose behalf we execute
 514   @type instances: list
 515   @param instances: list of instance names or None for all instances
 516   @rtype: list
 517   @return: the list of instances, sorted
 518   @raise errors.OpPrereqError: if the instances parameter is wrong type
 519   @raise errors.OpPrereqError: if any of the passed instances is not found
 520
 521   """
 522   if instances:
 523     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 524   else:
 525     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 526   return wanted
 527
 528
 529 def _GetUpdatedParams(old_params, update_dict,
 530                       use_default=True, use_none=False):
 531   """Return the new version of a parameter dictionary.
 532
 533   @type old_params: dict
 534   @param old_params: old parameters
 535   @type update_dict: dict
 536   @param update_dict: dict containing new parameter values, or
 537       constants.VALUE_DEFAULT to reset the parameter to its default
 538       value
 539   @param use_default: boolean
 540   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 541       values as 'to be deleted' values
 542   @param use_none: boolean
 543   @type use_none: whether to recognise C{None} values as 'to be
 544       deleted' values
 545   @rtype: dict
 546   @return: the new parameter dictionary
 547
 548   """
 549   params_copy = copy.deepcopy(old_params)
 550   for key, val in update_dict.iteritems():
 551     if ((use_default and val == constants.VALUE_DEFAULT) or
 552         (use_none and val is None)):
 553       try:
 554         del params_copy[key]
 555       except KeyError:
 556         pass
 557     else:
 558       params_copy[key] = val
 559   return params_copy
 560
 561
 562 def _CheckOutputFields(static, dynamic, selected):
 563   """Checks whether all selected fields are valid.
 564
 565   @type static: L{utils.FieldSet}
 566   @param static: static fields set
 567   @type dynamic: L{utils.FieldSet}
 568   @param dynamic: dynamic fields set
 569
 570   """
 571   f = utils.FieldSet()
 572   f.Extend(static)
 573   f.Extend(dynamic)
 574
 575   delta = f.NonMatching(selected)
 576   if delta:
 577     raise errors.OpPrereqError("Unknown output fields selected: %s"
 578                                % ",".join(delta), errors.ECODE_INVAL)
 579
 580
 581 def _CheckGlobalHvParams(params):
 582   """Validates that given hypervisor params are not global ones.
 583
 584   This will ensure that instances don't get customised versions of
 585   global params.
 586
 587   """
 588   used_globals = constants.HVC_GLOBALS.intersection(params)
 589   if used_globals:
 590     msg = ("The following hypervisor parameters are global and cannot"
 591            " be customized at instance level, please modify them at"
 592            " cluster level: %s" % utils.CommaJoin(used_globals))
 593     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 594
 595
 596 def _CheckNodeOnline(lu, node):
 597   """Ensure that a given node is online.
 598
 599   @param lu: the LU on behalf of which we make the check
 600   @param node: the node to check
 601   @raise errors.OpPrereqError: if the node is offline
 602
 603   """
 604   if lu.cfg.GetNodeInfo(node).offline:
 605     raise errors.OpPrereqError("Can't use offline node %s" % node,
 606                                errors.ECODE_INVAL)
 607
 608
 609 def _CheckNodeNotDrained(lu, node):
 610   """Ensure that a given node is not drained.
 611
 612   @param lu: the LU on behalf of which we make the check
 613   @param node: the node to check
 614   @raise errors.OpPrereqError: if the node is drained
 615
 616   """
 617   if lu.cfg.GetNodeInfo(node).drained:
 618     raise errors.OpPrereqError("Can't use drained node %s" % node,
 619                                errors.ECODE_INVAL)
 620
 621
 622 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 623   """Ensure that a node supports a given OS.
 624
 625   @param lu: the LU on behalf of which we make the check
 626   @param node: the node to check
 627   @param os_name: the OS to query about
 628   @param force_variant: whether to ignore variant errors
 629   @raise errors.OpPrereqError: if the node is not supporting the OS
 630
 631   """
 632   result = lu.rpc.call_os_get(node, os_name)
 633   result.Raise("OS '%s' not in supported OS list for node %s" %
 634                (os_name, node),
 635                prereq=True, ecode=errors.ECODE_INVAL)
 636   if not force_variant:
 637     _CheckOSVariant(result.payload, os_name)
 638
 639
 640 def _RequireFileStorage():
 641   """Checks that file storage is enabled.
 642
 643   @raise errors.OpPrereqError: when file storage is disabled
 644
 645   """
 646   if not constants.ENABLE_FILE_STORAGE:
 647     raise errors.OpPrereqError("File storage disabled at configure time",
 648                                errors.ECODE_INVAL)
 649
 650
 651 def _CheckDiskTemplate(template):
 652   """Ensure a given disk template is valid.
 653
 654   """
 655   if template not in constants.DISK_TEMPLATES:
 656     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 657            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 658     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 659   if template == constants.DT_FILE:
 660     _RequireFileStorage()
 661   return True
 662
 663
 664 def _CheckStorageType(storage_type):
 665   """Ensure a given storage type is valid.
 666
 667   """
 668   if storage_type not in constants.VALID_STORAGE_TYPES:
 669     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 670                                errors.ECODE_INVAL)
 671   if storage_type == constants.ST_FILE:
 672     _RequireFileStorage()
 673   return True
 674
 675
 676 def _GetClusterDomainSecret():
 677   """Reads the cluster domain secret.
 678
 679   """
 680   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 681                                strict=True)
 682
 683
 684 def _CheckInstanceDown(lu, instance, reason):
 685   """Ensure that an instance is not running."""
 686   if instance.admin_up:
 687     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 688                                (instance.name, reason), errors.ECODE_STATE)
 689
 690   pnode = instance.primary_node
 691   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 692   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 693               prereq=True, ecode=errors.ECODE_ENVIRON)
 694
 695   if instance.name in ins_l.payload:
 696     raise errors.OpPrereqError("Instance %s is running, %s" %
 697                                (instance.name, reason), errors.ECODE_STATE)
 698
 699
 700 def _ExpandItemName(fn, name, kind):
 701   """Expand an item name.
 702
 703   @param fn: the function to use for expansion
 704   @param name: requested item name
 705   @param kind: text description ('Node' or 'Instance')
 706   @return: the resolved (full) name
 707   @raise errors.OpPrereqError: if the item is not found
 708
 709   """
 710   full_name = fn(name)
 711   if full_name is None:
 712     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 713                                errors.ECODE_NOENT)
 714   return full_name
 715
 716
 717 def _ExpandNodeName(cfg, name):
 718   """Wrapper over L{_ExpandItemName} for nodes."""
 719   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 720
 721
 722 def _ExpandInstanceName(cfg, name):
 723   """Wrapper over L{_ExpandItemName} for instance."""
 724   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 725
 726
 727 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 728                           memory, vcpus, nics, disk_template, disks,
 729                           bep, hvp, hypervisor_name):
 730   """Builds instance related env variables for hooks
 731
 732   This builds the hook environment from individual variables.
 733
 734   @type name: string
 735   @param name: the name of the instance
 736   @type primary_node: string
 737   @param primary_node: the name of the instance's primary node
 738   @type secondary_nodes: list
 739   @param secondary_nodes: list of secondary nodes as strings
 740   @type os_type: string
 741   @param os_type: the name of the instance's OS
 742   @type status: boolean
 743   @param status: the should_run status of the instance
 744   @type memory: string
 745   @param memory: the memory size of the instance
 746   @type vcpus: string
 747   @param vcpus: the count of VCPUs the instance has
 748   @type nics: list
 749   @param nics: list of tuples (ip, mac, mode, link) representing
 750       the NICs the instance has
 751   @type disk_template: string
 752   @param disk_template: the disk template of the instance
 753   @type disks: list
 754   @param disks: the list of (size, mode) pairs
 755   @type bep: dict
 756   @param bep: the backend parameters for the instance
 757   @type hvp: dict
 758   @param hvp: the hypervisor parameters for the instance
 759   @type hypervisor_name: string
 760   @param hypervisor_name: the hypervisor for the instance
 761   @rtype: dict
 762   @return: the hook environment for this instance
 763
 764   """
 765   if status:
 766     str_status = "up"
 767   else:
 768     str_status = "down"
 769   env = {
 770     "OP_TARGET": name,
 771     "INSTANCE_NAME": name,
 772     "INSTANCE_PRIMARY": primary_node,
 773     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 774     "INSTANCE_OS_TYPE": os_type,
 775     "INSTANCE_STATUS": str_status,
 776     "INSTANCE_MEMORY": memory,
 777     "INSTANCE_VCPUS": vcpus,
 778     "INSTANCE_DISK_TEMPLATE": disk_template,
 779     "INSTANCE_HYPERVISOR": hypervisor_name,
 780   }
 781
 782   if nics:
 783     nic_count = len(nics)
 784     for idx, (ip, mac, mode, link) in enumerate(nics):
 785       if ip is None:
 786         ip = ""
 787       env["INSTANCE_NIC%d_IP" % idx] = ip
 788       env["INSTANCE_NIC%d_MAC" % idx] = mac
 789       env["INSTANCE_NIC%d_MODE" % idx] = mode
 790       env["INSTANCE_NIC%d_LINK" % idx] = link
 791       if mode == constants.NIC_MODE_BRIDGED:
 792         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 793   else:
 794     nic_count = 0
 795
 796   env["INSTANCE_NIC_COUNT"] = nic_count
 797
 798   if disks:
 799     disk_count = len(disks)
 800     for idx, (size, mode) in enumerate(disks):
 801       env["INSTANCE_DISK%d_SIZE" % idx] = size
 802       env["INSTANCE_DISK%d_MODE" % idx] = mode
 803   else:
 804     disk_count = 0
 805
 806   env["INSTANCE_DISK_COUNT"] = disk_count
 807
 808   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 809     for key, value in source.items():
 810       env["INSTANCE_%s_%s" % (kind, key)] = value
 811
 812   return env
 813
 814
 815 def _NICListToTuple(lu, nics):
 816   """Build a list of nic information tuples.
 817
 818   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 819   value in LUQueryInstanceData.
 820
 821   @type lu:  L{LogicalUnit}
 822   @param lu: the logical unit on whose behalf we execute
 823   @type nics: list of L{objects.NIC}
 824   @param nics: list of nics to convert to hooks tuples
 825
 826   """
 827   hooks_nics = []
 828   cluster = lu.cfg.GetClusterInfo()
 829   for nic in nics:
 830     ip = nic.ip
 831     mac = nic.mac
 832     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 833     mode = filled_params[constants.NIC_MODE]
 834     link = filled_params[constants.NIC_LINK]
 835     hooks_nics.append((ip, mac, mode, link))
 836   return hooks_nics
 837
 838
 839 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 840   """Builds instance related env variables for hooks from an object.
 841
 842   @type lu: L{LogicalUnit}
 843   @param lu: the logical unit on whose behalf we execute
 844   @type instance: L{objects.Instance}
 845   @param instance: the instance for which we should build the
 846       environment
 847   @type override: dict
 848   @param override: dictionary with key/values that will override
 849       our values
 850   @rtype: dict
 851   @return: the hook environment dictionary
 852
 853   """
 854   cluster = lu.cfg.GetClusterInfo()
 855   bep = cluster.FillBE(instance)
 856   hvp = cluster.FillHV(instance)
 857   args = {
 858     'name': instance.name,
 859     'primary_node': instance.primary_node,
 860     'secondary_nodes': instance.secondary_nodes,
 861     'os_type': instance.os,
 862     'status': instance.admin_up,
 863     'memory': bep[constants.BE_MEMORY],
 864     'vcpus': bep[constants.BE_VCPUS],
 865     'nics': _NICListToTuple(lu, instance.nics),
 866     'disk_template': instance.disk_template,
 867     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 868     'bep': bep,
 869     'hvp': hvp,
 870     'hypervisor_name': instance.hypervisor,
 871   }
 872   if override:
 873     args.update(override)
 874   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 875
 876
 877 def _AdjustCandidatePool(lu, exceptions):
 878   """Adjust the candidate pool after node operations.
 879
 880   """
 881   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 882   if mod_list:
 883     lu.LogInfo("Promoted nodes to master candidate role: %s",
 884                utils.CommaJoin(node.name for node in mod_list))
 885     for name in mod_list:
 886       lu.context.ReaddNode(name)
 887   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 888   if mc_now > mc_max:
 889     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 890                (mc_now, mc_max))
 891
 892
 893 def _DecideSelfPromotion(lu, exceptions=None):
 894   """Decide whether I should promote myself as a master candidate.
 895
 896   """
 897   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 898   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 899   # the new node will increase mc_max with one, so:
 900   mc_should = min(mc_should + 1, cp_size)
 901   return mc_now < mc_should
 902
 903
 904 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 905   """Check that the brigdes needed by a list of nics exist.
 906
 907   """
 908   cluster = lu.cfg.GetClusterInfo()
 909   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 910   brlist = [params[constants.NIC_LINK] for params in paramslist
 911             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 912   if brlist:
 913     result = lu.rpc.call_bridges_exist(target_node, brlist)
 914     result.Raise("Error checking bridges on destination node '%s'" %
 915                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 916
 917
 918 def _CheckInstanceBridgesExist(lu, instance, node=None):
 919   """Check that the brigdes needed by an instance exist.
 920
 921   """
 922   if node is None:
 923     node = instance.primary_node
 924   _CheckNicsBridgesExist(lu, instance.nics, node)
 925
 926
 927 def _CheckOSVariant(os_obj, name):
 928   """Check whether an OS name conforms to the os variants specification.
 929
 930   @type os_obj: L{objects.OS}
 931   @param os_obj: OS object to check
 932   @type name: string
 933   @param name: OS name passed by the user, to check for validity
 934
 935   """
 936   if not os_obj.supported_variants:
 937     return
 938   variant = objects.OS.GetVariant(name)
 939   if not variant:
 940     raise errors.OpPrereqError("OS name must include a variant",
 941                                errors.ECODE_INVAL)
 942
 943   if variant not in os_obj.supported_variants:
 944     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
 945
 946
 947 def _GetNodeInstancesInner(cfg, fn):
 948   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 949
 950
 951 def _GetNodeInstances(cfg, node_name):
 952   """Returns a list of all primary and secondary instances on a node.
 953
 954   """
 955
 956   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
 957
 958
 959 def _GetNodePrimaryInstances(cfg, node_name):
 960   """Returns primary instances on a node.
 961
 962   """
 963   return _GetNodeInstancesInner(cfg,
 964                                 lambda inst: node_name == inst.primary_node)
 965
 966
 967 def _GetNodeSecondaryInstances(cfg, node_name):
 968   """Returns secondary instances on a node.
 969
 970   """
 971   return _GetNodeInstancesInner(cfg,
 972                                 lambda inst: node_name in inst.secondary_nodes)
 973
 974
 975 def _GetStorageTypeArgs(cfg, storage_type):
 976   """Returns the arguments for a storage type.
 977
 978   """
 979   # Special case for file storage
 980   if storage_type == constants.ST_FILE:
 981     # storage.FileStorage wants a list of storage directories
 982     return [[cfg.GetFileStorageDir()]]
 983
 984   return []
 985
 986
 987 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
 988   faulty = []
 989
 990   for dev in instance.disks:
 991     cfg.SetDiskID(dev, node_name)
 992
 993   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
 994   result.Raise("Failed to get disk status from node %s" % node_name,
 995                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 996
 997   for idx, bdev_status in enumerate(result.payload):
 998     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
 999       faulty.append(idx)
1000
1001   return faulty
1002
1003
1004 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1005   """Check the sanity of iallocator and node arguments and use the
1006   cluster-wide iallocator if appropriate.
1007
1008   Check that at most one of (iallocator, node) is specified. If none is
1009   specified, then the LU's opcode's iallocator slot is filled with the
1010   cluster-wide default iallocator.
1011
1012   @type iallocator_slot: string
1013   @param iallocator_slot: the name of the opcode iallocator slot
1014   @type node_slot: string
1015   @param node_slot: the name of the opcode target node slot
1016
1017   """
1018   node = getattr(lu.op, node_slot, None)
1019   iallocator = getattr(lu.op, iallocator_slot, None)
1020
1021   if node is not None and iallocator is not None:
1022     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1023                                errors.ECODE_INVAL)
1024   elif node is None and iallocator is None:
1025     default_iallocator = lu.cfg.GetDefaultIAllocator()
1026     if default_iallocator:
1027       setattr(lu.op, iallocator_slot, default_iallocator)
1028     else:
1029       raise errors.OpPrereqError("No iallocator or node given and no"
1030                                  " cluster-wide default iallocator found."
1031                                  " Please specify either an iallocator or a"
1032                                  " node, or set a cluster-wide default"
1033                                  " iallocator.")
1034
1035
1036 class LUPostInitCluster(LogicalUnit):
1037   """Logical unit for running hooks after cluster initialization.
1038
1039   """
1040   HPATH = "cluster-init"
1041   HTYPE = constants.HTYPE_CLUSTER
1042
1043   def BuildHooksEnv(self):
1044     """Build hooks env.
1045
1046     """
1047     env = {"OP_TARGET": self.cfg.GetClusterName()}
1048     mn = self.cfg.GetMasterNode()
1049     return env, [], [mn]
1050
1051   def Exec(self, feedback_fn):
1052     """Nothing to do.
1053
1054     """
1055     return True
1056
1057
1058 class LUDestroyCluster(LogicalUnit):
1059   """Logical unit for destroying the cluster.
1060
1061   """
1062   HPATH = "cluster-destroy"
1063   HTYPE = constants.HTYPE_CLUSTER
1064
1065   def BuildHooksEnv(self):
1066     """Build hooks env.
1067
1068     """
1069     env = {"OP_TARGET": self.cfg.GetClusterName()}
1070     return env, [], []
1071
1072   def CheckPrereq(self):
1073     """Check prerequisites.
1074
1075     This checks whether the cluster is empty.
1076
1077     Any errors are signaled by raising errors.OpPrereqError.
1078
1079     """
1080     master = self.cfg.GetMasterNode()
1081
1082     nodelist = self.cfg.GetNodeList()
1083     if len(nodelist) != 1 or nodelist[0] != master:
1084       raise errors.OpPrereqError("There are still %d node(s) in"
1085                                  " this cluster." % (len(nodelist) - 1),
1086                                  errors.ECODE_INVAL)
1087     instancelist = self.cfg.GetInstanceList()
1088     if instancelist:
1089       raise errors.OpPrereqError("There are still %d instance(s) in"
1090                                  " this cluster." % len(instancelist),
1091                                  errors.ECODE_INVAL)
1092
1093   def Exec(self, feedback_fn):
1094     """Destroys the cluster.
1095
1096     """
1097     master = self.cfg.GetMasterNode()
1098
1099     # Run post hooks on master node before it's removed
1100     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1101     try:
1102       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1103     except:
1104       # pylint: disable-msg=W0702
1105       self.LogWarning("Errors occurred running hooks on %s" % master)
1106
1107     result = self.rpc.call_node_stop_master(master, False)
1108     result.Raise("Could not disable the master role")
1109
1110     return master
1111
1112
1113 def _VerifyCertificate(filename):
1114   """Verifies a certificate for LUVerifyCluster.
1115
1116   @type filename: string
1117   @param filename: Path to PEM file
1118
1119   """
1120   try:
1121     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1122                                            utils.ReadFile(filename))
1123   except Exception, err: # pylint: disable-msg=W0703
1124     return (LUVerifyCluster.ETYPE_ERROR,
1125             "Failed to load X509 certificate %s: %s" % (filename, err))
1126
1127   (errcode, msg) = \
1128     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1129                                 constants.SSL_CERT_EXPIRATION_ERROR)
1130
1131   if msg:
1132     fnamemsg = "While verifying %s: %s" % (filename, msg)
1133   else:
1134     fnamemsg = None
1135
1136   if errcode is None:
1137     return (None, fnamemsg)
1138   elif errcode == utils.CERT_WARNING:
1139     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1140   elif errcode == utils.CERT_ERROR:
1141     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1142
1143   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1144
1145
1146 class LUVerifyCluster(LogicalUnit):
1147   """Verifies the cluster status.
1148
1149   """
1150   HPATH = "cluster-verify"
1151   HTYPE = constants.HTYPE_CLUSTER
1152   _OP_PARAMS = [
1153     ("skip_checks", ht.EmptyList,
1154      ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1155     ("verbose", False, ht.TBool),
1156     ("error_codes", False, ht.TBool),
1157     ("debug_simulate_errors", False, ht.TBool),
1158     ]
1159   REQ_BGL = False
1160
1161   TCLUSTER = "cluster"
1162   TNODE = "node"
1163   TINSTANCE = "instance"
1164
1165   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1166   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1167   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1168   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1169   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1170   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1171   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1172   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1173   ENODEDRBD = (TNODE, "ENODEDRBD")
1174   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1175   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1176   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1177   ENODEHV = (TNODE, "ENODEHV")
1178   ENODELVM = (TNODE, "ENODELVM")
1179   ENODEN1 = (TNODE, "ENODEN1")
1180   ENODENET = (TNODE, "ENODENET")
1181   ENODEOS = (TNODE, "ENODEOS")
1182   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1183   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1184   ENODERPC = (TNODE, "ENODERPC")
1185   ENODESSH = (TNODE, "ENODESSH")
1186   ENODEVERSION = (TNODE, "ENODEVERSION")
1187   ENODESETUP = (TNODE, "ENODESETUP")
1188   ENODETIME = (TNODE, "ENODETIME")
1189
1190   ETYPE_FIELD = "code"
1191   ETYPE_ERROR = "ERROR"
1192   ETYPE_WARNING = "WARNING"
1193
1194   class NodeImage(object):
1195     """A class representing the logical and physical status of a node.
1196
1197     @type name: string
1198     @ivar name: the node name to which this object refers
1199     @ivar volumes: a structure as returned from
1200         L{ganeti.backend.GetVolumeList} (runtime)
1201     @ivar instances: a list of running instances (runtime)
1202     @ivar pinst: list of configured primary instances (config)
1203     @ivar sinst: list of configured secondary instances (config)
1204     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1205         of this node (config)
1206     @ivar mfree: free memory, as reported by hypervisor (runtime)
1207     @ivar dfree: free disk, as reported by the node (runtime)
1208     @ivar offline: the offline status (config)
1209     @type rpc_fail: boolean
1210     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1211         not whether the individual keys were correct) (runtime)
1212     @type lvm_fail: boolean
1213     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1214     @type hyp_fail: boolean
1215     @ivar hyp_fail: whether the RPC call didn't return the instance list
1216     @type ghost: boolean
1217     @ivar ghost: whether this is a known node or not (config)
1218     @type os_fail: boolean
1219     @ivar os_fail: whether the RPC call didn't return valid OS data
1220     @type oslist: list
1221     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1222
1223     """
1224     def __init__(self, offline=False, name=None):
1225       self.name = name
1226       self.volumes = {}
1227       self.instances = []
1228       self.pinst = []
1229       self.sinst = []
1230       self.sbp = {}
1231       self.mfree = 0
1232       self.dfree = 0
1233       self.offline = offline
1234       self.rpc_fail = False
1235       self.lvm_fail = False
1236       self.hyp_fail = False
1237       self.ghost = False
1238       self.os_fail = False
1239       self.oslist = {}
1240
1241   def ExpandNames(self):
1242     self.needed_locks = {
1243       locking.LEVEL_NODE: locking.ALL_SET,
1244       locking.LEVEL_INSTANCE: locking.ALL_SET,
1245     }
1246     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1247
1248   def _Error(self, ecode, item, msg, *args, **kwargs):
1249     """Format an error message.
1250
1251     Based on the opcode's error_codes parameter, either format a
1252     parseable error code, or a simpler error string.
1253
1254     This must be called only from Exec and functions called from Exec.
1255
1256     """
1257     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1258     itype, etxt = ecode
1259     # first complete the msg
1260     if args:
1261       msg = msg % args
1262     # then format the whole message
1263     if self.op.error_codes:
1264       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1265     else:
1266       if item:
1267         item = " " + item
1268       else:
1269         item = ""
1270       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1271     # and finally report it via the feedback_fn
1272     self._feedback_fn("  - %s" % msg)
1273
1274   def _ErrorIf(self, cond, *args, **kwargs):
1275     """Log an error message if the passed condition is True.
1276
1277     """
1278     cond = bool(cond) or self.op.debug_simulate_errors
1279     if cond:
1280       self._Error(*args, **kwargs)
1281     # do not mark the operation as failed for WARN cases only
1282     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1283       self.bad = self.bad or cond
1284
1285   def _VerifyNode(self, ninfo, nresult):
1286     """Perform some basic validation on data returned from a node.
1287
1288       - check the result data structure is well formed and has all the
1289         mandatory fields
1290       - check ganeti version
1291
1292     @type ninfo: L{objects.Node}
1293     @param ninfo: the node to check
1294     @param nresult: the results from the node
1295     @rtype: boolean
1296     @return: whether overall this call was successful (and we can expect
1297          reasonable values in the respose)
1298
1299     """
1300     node = ninfo.name
1301     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1302
1303     # main result, nresult should be a non-empty dict
1304     test = not nresult or not isinstance(nresult, dict)
1305     _ErrorIf(test, self.ENODERPC, node,
1306                   "unable to verify node: no data returned")
1307     if test:
1308       return False
1309
1310     # compares ganeti version
1311     local_version = constants.PROTOCOL_VERSION
1312     remote_version = nresult.get("version", None)
1313     test = not (remote_version and
1314                 isinstance(remote_version, (list, tuple)) and
1315                 len(remote_version) == 2)
1316     _ErrorIf(test, self.ENODERPC, node,
1317              "connection to node returned invalid data")
1318     if test:
1319       return False
1320
1321     test = local_version != remote_version[0]
1322     _ErrorIf(test, self.ENODEVERSION, node,
1323              "incompatible protocol versions: master %s,"
1324              " node %s", local_version, remote_version[0])
1325     if test:
1326       return False
1327
1328     # node seems compatible, we can actually try to look into its results
1329
1330     # full package version
1331     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1332                   self.ENODEVERSION, node,
1333                   "software version mismatch: master %s, node %s",
1334                   constants.RELEASE_VERSION, remote_version[1],
1335                   code=self.ETYPE_WARNING)
1336
1337     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1338     if isinstance(hyp_result, dict):
1339       for hv_name, hv_result in hyp_result.iteritems():
1340         test = hv_result is not None
1341         _ErrorIf(test, self.ENODEHV, node,
1342                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1343
1344
1345     test = nresult.get(constants.NV_NODESETUP,
1346                            ["Missing NODESETUP results"])
1347     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1348              "; ".join(test))
1349
1350     return True
1351
1352   def _VerifyNodeTime(self, ninfo, nresult,
1353                       nvinfo_starttime, nvinfo_endtime):
1354     """Check the node time.
1355
1356     @type ninfo: L{objects.Node}
1357     @param ninfo: the node to check
1358     @param nresult: the remote results for the node
1359     @param nvinfo_starttime: the start time of the RPC call
1360     @param nvinfo_endtime: the end time of the RPC call
1361
1362     """
1363     node = ninfo.name
1364     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1365
1366     ntime = nresult.get(constants.NV_TIME, None)
1367     try:
1368       ntime_merged = utils.MergeTime(ntime)
1369     except (ValueError, TypeError):
1370       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1371       return
1372
1373     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1374       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1375     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1376       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1377     else:
1378       ntime_diff = None
1379
1380     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1381              "Node time diverges by at least %s from master node time",
1382              ntime_diff)
1383
1384   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1385     """Check the node time.
1386
1387     @type ninfo: L{objects.Node}
1388     @param ninfo: the node to check
1389     @param nresult: the remote results for the node
1390     @param vg_name: the configured VG name
1391
1392     """
1393     if vg_name is None:
1394       return
1395
1396     node = ninfo.name
1397     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1398
1399     # checks vg existence and size > 20G
1400     vglist = nresult.get(constants.NV_VGLIST, None)
1401     test = not vglist
1402     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1403     if not test:
1404       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1405                                             constants.MIN_VG_SIZE)
1406       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1407
1408     # check pv names
1409     pvlist = nresult.get(constants.NV_PVLIST, None)
1410     test = pvlist is None
1411     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1412     if not test:
1413       # check that ':' is not present in PV names, since it's a
1414       # special character for lvcreate (denotes the range of PEs to
1415       # use on the PV)
1416       for _, pvname, owner_vg in pvlist:
1417         test = ":" in pvname
1418         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1419                  " '%s' of VG '%s'", pvname, owner_vg)
1420
1421   def _VerifyNodeNetwork(self, ninfo, nresult):
1422     """Check the node time.
1423
1424     @type ninfo: L{objects.Node}
1425     @param ninfo: the node to check
1426     @param nresult: the remote results for the node
1427
1428     """
1429     node = ninfo.name
1430     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1431
1432     test = constants.NV_NODELIST not in nresult
1433     _ErrorIf(test, self.ENODESSH, node,
1434              "node hasn't returned node ssh connectivity data")
1435     if not test:
1436       if nresult[constants.NV_NODELIST]:
1437         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1438           _ErrorIf(True, self.ENODESSH, node,
1439                    "ssh communication with node '%s': %s", a_node, a_msg)
1440
1441     test = constants.NV_NODENETTEST not in nresult
1442     _ErrorIf(test, self.ENODENET, node,
1443              "node hasn't returned node tcp connectivity data")
1444     if not test:
1445       if nresult[constants.NV_NODENETTEST]:
1446         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1447         for anode in nlist:
1448           _ErrorIf(True, self.ENODENET, node,
1449                    "tcp communication with node '%s': %s",
1450                    anode, nresult[constants.NV_NODENETTEST][anode])
1451
1452     test = constants.NV_MASTERIP not in nresult
1453     _ErrorIf(test, self.ENODENET, node,
1454              "node hasn't returned node master IP reachability data")
1455     if not test:
1456       if not nresult[constants.NV_MASTERIP]:
1457         if node == self.master_node:
1458           msg = "the master node cannot reach the master IP (not configured?)"
1459         else:
1460           msg = "cannot reach the master IP"
1461         _ErrorIf(True, self.ENODENET, node, msg)
1462
1463
1464   def _VerifyInstance(self, instance, instanceconfig, node_image):
1465     """Verify an instance.
1466
1467     This function checks to see if the required block devices are
1468     available on the instance's node.
1469
1470     """
1471     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1472     node_current = instanceconfig.primary_node
1473
1474     node_vol_should = {}
1475     instanceconfig.MapLVsByNode(node_vol_should)
1476
1477     for node in node_vol_should:
1478       n_img = node_image[node]
1479       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1480         # ignore missing volumes on offline or broken nodes
1481         continue
1482       for volume in node_vol_should[node]:
1483         test = volume not in n_img.volumes
1484         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1485                  "volume %s missing on node %s", volume, node)
1486
1487     if instanceconfig.admin_up:
1488       pri_img = node_image[node_current]
1489       test = instance not in pri_img.instances and not pri_img.offline
1490       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1491                "instance not running on its primary node %s",
1492                node_current)
1493
1494     for node, n_img in node_image.items():
1495       if (not node == node_current):
1496         test = instance in n_img.instances
1497         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1498                  "instance should not run on node %s", node)
1499
1500   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1501     """Verify if there are any unknown volumes in the cluster.
1502
1503     The .os, .swap and backup volumes are ignored. All other volumes are
1504     reported as unknown.
1505
1506     @type reserved: L{ganeti.utils.FieldSet}
1507     @param reserved: a FieldSet of reserved volume names
1508
1509     """
1510     for node, n_img in node_image.items():
1511       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1512         # skip non-healthy nodes
1513         continue
1514       for volume in n_img.volumes:
1515         test = ((node not in node_vol_should or
1516                 volume not in node_vol_should[node]) and
1517                 not reserved.Matches(volume))
1518         self._ErrorIf(test, self.ENODEORPHANLV, node,
1519                       "volume %s is unknown", volume)
1520
1521   def _VerifyOrphanInstances(self, instancelist, node_image):
1522     """Verify the list of running instances.
1523
1524     This checks what instances are running but unknown to the cluster.
1525
1526     """
1527     for node, n_img in node_image.items():
1528       for o_inst in n_img.instances:
1529         test = o_inst not in instancelist
1530         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1531                       "instance %s on node %s should not exist", o_inst, node)
1532
1533   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1534     """Verify N+1 Memory Resilience.
1535
1536     Check that if one single node dies we can still start all the
1537     instances it was primary for.
1538
1539     """
1540     for node, n_img in node_image.items():
1541       # This code checks that every node which is now listed as
1542       # secondary has enough memory to host all instances it is
1543       # supposed to should a single other node in the cluster fail.
1544       # FIXME: not ready for failover to an arbitrary node
1545       # FIXME: does not support file-backed instances
1546       # WARNING: we currently take into account down instances as well
1547       # as up ones, considering that even if they're down someone
1548       # might want to start them even in the event of a node failure.
1549       for prinode, instances in n_img.sbp.items():
1550         needed_mem = 0
1551         for instance in instances:
1552           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1553           if bep[constants.BE_AUTO_BALANCE]:
1554             needed_mem += bep[constants.BE_MEMORY]
1555         test = n_img.mfree < needed_mem
1556         self._ErrorIf(test, self.ENODEN1, node,
1557                       "not enough memory on to accommodate"
1558                       " failovers should peer node %s fail", prinode)
1559
1560   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1561                        master_files):
1562     """Verifies and computes the node required file checksums.
1563
1564     @type ninfo: L{objects.Node}
1565     @param ninfo: the node to check
1566     @param nresult: the remote results for the node
1567     @param file_list: required list of files
1568     @param local_cksum: dictionary of local files and their checksums
1569     @param master_files: list of files that only masters should have
1570
1571     """
1572     node = ninfo.name
1573     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1574
1575     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1576     test = not isinstance(remote_cksum, dict)
1577     _ErrorIf(test, self.ENODEFILECHECK, node,
1578              "node hasn't returned file checksum data")
1579     if test:
1580       return
1581
1582     for file_name in file_list:
1583       node_is_mc = ninfo.master_candidate
1584       must_have = (file_name not in master_files) or node_is_mc
1585       # missing
1586       test1 = file_name not in remote_cksum
1587       # invalid checksum
1588       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1589       # existing and good
1590       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1591       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1592                "file '%s' missing", file_name)
1593       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1594                "file '%s' has wrong checksum", file_name)
1595       # not candidate and this is not a must-have file
1596       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1597                "file '%s' should not exist on non master"
1598                " candidates (and the file is outdated)", file_name)
1599       # all good, except non-master/non-must have combination
1600       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1601                "file '%s' should not exist"
1602                " on non master candidates", file_name)
1603
1604   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1605                       drbd_map):
1606     """Verifies and the node DRBD status.
1607
1608     @type ninfo: L{objects.Node}
1609     @param ninfo: the node to check
1610     @param nresult: the remote results for the node
1611     @param instanceinfo: the dict of instances
1612     @param drbd_helper: the configured DRBD usermode helper
1613     @param drbd_map: the DRBD map as returned by
1614         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1615
1616     """
1617     node = ninfo.name
1618     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1619
1620     if drbd_helper:
1621       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1622       test = (helper_result == None)
1623       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1624                "no drbd usermode helper returned")
1625       if helper_result:
1626         status, payload = helper_result
1627         test = not status
1628         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1629                  "drbd usermode helper check unsuccessful: %s", payload)
1630         test = status and (payload != drbd_helper)
1631         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1632                  "wrong drbd usermode helper: %s", payload)
1633
1634     # compute the DRBD minors
1635     node_drbd = {}
1636     for minor, instance in drbd_map[node].items():
1637       test = instance not in instanceinfo
1638       _ErrorIf(test, self.ECLUSTERCFG, None,
1639                "ghost instance '%s' in temporary DRBD map", instance)
1640         # ghost instance should not be running, but otherwise we
1641         # don't give double warnings (both ghost instance and
1642         # unallocated minor in use)
1643       if test:
1644         node_drbd[minor] = (instance, False)
1645       else:
1646         instance = instanceinfo[instance]
1647         node_drbd[minor] = (instance.name, instance.admin_up)
1648
1649     # and now check them
1650     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1651     test = not isinstance(used_minors, (tuple, list))
1652     _ErrorIf(test, self.ENODEDRBD, node,
1653              "cannot parse drbd status file: %s", str(used_minors))
1654     if test:
1655       # we cannot check drbd status
1656       return
1657
1658     for minor, (iname, must_exist) in node_drbd.items():
1659       test = minor not in used_minors and must_exist
1660       _ErrorIf(test, self.ENODEDRBD, node,
1661                "drbd minor %d of instance %s is not active", minor, iname)
1662     for minor in used_minors:
1663       test = minor not in node_drbd
1664       _ErrorIf(test, self.ENODEDRBD, node,
1665                "unallocated drbd minor %d is in use", minor)
1666
1667   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1668     """Builds the node OS structures.
1669
1670     @type ninfo: L{objects.Node}
1671     @param ninfo: the node to check
1672     @param nresult: the remote results for the node
1673     @param nimg: the node image object
1674
1675     """
1676     node = ninfo.name
1677     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1678
1679     remote_os = nresult.get(constants.NV_OSLIST, None)
1680     test = (not isinstance(remote_os, list) or
1681             not compat.all(isinstance(v, list) and len(v) == 7
1682                            for v in remote_os))
1683
1684     _ErrorIf(test, self.ENODEOS, node,
1685              "node hasn't returned valid OS data")
1686
1687     nimg.os_fail = test
1688
1689     if test:
1690       return
1691
1692     os_dict = {}
1693
1694     for (name, os_path, status, diagnose,
1695          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1696
1697       if name not in os_dict:
1698         os_dict[name] = []
1699
1700       # parameters is a list of lists instead of list of tuples due to
1701       # JSON lacking a real tuple type, fix it:
1702       parameters = [tuple(v) for v in parameters]
1703       os_dict[name].append((os_path, status, diagnose,
1704                             set(variants), set(parameters), set(api_ver)))
1705
1706     nimg.oslist = os_dict
1707
1708   def _VerifyNodeOS(self, ninfo, nimg, base):
1709     """Verifies the node OS list.
1710
1711     @type ninfo: L{objects.Node}
1712     @param ninfo: the node to check
1713     @param nimg: the node image object
1714     @param base: the 'template' node we match against (e.g. from the master)
1715
1716     """
1717     node = ninfo.name
1718     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1719
1720     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1721
1722     for os_name, os_data in nimg.oslist.items():
1723       assert os_data, "Empty OS status for OS %s?!" % os_name
1724       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1725       _ErrorIf(not f_status, self.ENODEOS, node,
1726                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1727       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1728                "OS '%s' has multiple entries (first one shadows the rest): %s",
1729                os_name, utils.CommaJoin([v[0] for v in os_data]))
1730       # this will catched in backend too
1731       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1732                and not f_var, self.ENODEOS, node,
1733                "OS %s with API at least %d does not declare any variant",
1734                os_name, constants.OS_API_V15)
1735       # comparisons with the 'base' image
1736       test = os_name not in base.oslist
1737       _ErrorIf(test, self.ENODEOS, node,
1738                "Extra OS %s not present on reference node (%s)",
1739                os_name, base.name)
1740       if test:
1741         continue
1742       assert base.oslist[os_name], "Base node has empty OS status?"
1743       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1744       if not b_status:
1745         # base OS is invalid, skipping
1746         continue
1747       for kind, a, b in [("API version", f_api, b_api),
1748                          ("variants list", f_var, b_var),
1749                          ("parameters", f_param, b_param)]:
1750         _ErrorIf(a != b, self.ENODEOS, node,
1751                  "OS %s %s differs from reference node %s: %s vs. %s",
1752                  kind, os_name, base.name,
1753                  utils.CommaJoin(a), utils.CommaJoin(b))
1754
1755     # check any missing OSes
1756     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1757     _ErrorIf(missing, self.ENODEOS, node,
1758              "OSes present on reference node %s but missing on this node: %s",
1759              base.name, utils.CommaJoin(missing))
1760
1761   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1762     """Verifies and updates the node volume data.
1763
1764     This function will update a L{NodeImage}'s internal structures
1765     with data from the remote call.
1766
1767     @type ninfo: L{objects.Node}
1768     @param ninfo: the node to check
1769     @param nresult: the remote results for the node
1770     @param nimg: the node image object
1771     @param vg_name: the configured VG name
1772
1773     """
1774     node = ninfo.name
1775     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1776
1777     nimg.lvm_fail = True
1778     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1779     if vg_name is None:
1780       pass
1781     elif isinstance(lvdata, basestring):
1782       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1783                utils.SafeEncode(lvdata))
1784     elif not isinstance(lvdata, dict):
1785       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1786     else:
1787       nimg.volumes = lvdata
1788       nimg.lvm_fail = False
1789
1790   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1791     """Verifies and updates the node instance list.
1792
1793     If the listing was successful, then updates this node's instance
1794     list. Otherwise, it marks the RPC call as failed for the instance
1795     list key.
1796
1797     @type ninfo: L{objects.Node}
1798     @param ninfo: the node to check
1799     @param nresult: the remote results for the node
1800     @param nimg: the node image object
1801
1802     """
1803     idata = nresult.get(constants.NV_INSTANCELIST, None)
1804     test = not isinstance(idata, list)
1805     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1806                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1807     if test:
1808       nimg.hyp_fail = True
1809     else:
1810       nimg.instances = idata
1811
1812   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1813     """Verifies and computes a node information map
1814
1815     @type ninfo: L{objects.Node}
1816     @param ninfo: the node to check
1817     @param nresult: the remote results for the node
1818     @param nimg: the node image object
1819     @param vg_name: the configured VG name
1820
1821     """
1822     node = ninfo.name
1823     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1824
1825     # try to read free memory (from the hypervisor)
1826     hv_info = nresult.get(constants.NV_HVINFO, None)
1827     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1828     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1829     if not test:
1830       try:
1831         nimg.mfree = int(hv_info["memory_free"])
1832       except (ValueError, TypeError):
1833         _ErrorIf(True, self.ENODERPC, node,
1834                  "node returned invalid nodeinfo, check hypervisor")
1835
1836     # FIXME: devise a free space model for file based instances as well
1837     if vg_name is not None:
1838       test = (constants.NV_VGLIST not in nresult or
1839               vg_name not in nresult[constants.NV_VGLIST])
1840       _ErrorIf(test, self.ENODELVM, node,
1841                "node didn't return data for the volume group '%s'"
1842                " - it is either missing or broken", vg_name)
1843       if not test:
1844         try:
1845           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1846         except (ValueError, TypeError):
1847           _ErrorIf(True, self.ENODERPC, node,
1848                    "node returned invalid LVM info, check LVM status")
1849
1850   def BuildHooksEnv(self):
1851     """Build hooks env.
1852
1853     Cluster-Verify hooks just ran in the post phase and their failure makes
1854     the output be logged in the verify output and the verification to fail.
1855
1856     """
1857     all_nodes = self.cfg.GetNodeList()
1858     env = {
1859       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1860       }
1861     for node in self.cfg.GetAllNodesInfo().values():
1862       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1863
1864     return env, [], all_nodes
1865
1866   def Exec(self, feedback_fn):
1867     """Verify integrity of cluster, performing various test on nodes.
1868
1869     """
1870     self.bad = False
1871     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1872     verbose = self.op.verbose
1873     self._feedback_fn = feedback_fn
1874     feedback_fn("* Verifying global settings")
1875     for msg in self.cfg.VerifyConfig():
1876       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1877
1878     # Check the cluster certificates
1879     for cert_filename in constants.ALL_CERT_FILES:
1880       (errcode, msg) = _VerifyCertificate(cert_filename)
1881       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1882
1883     vg_name = self.cfg.GetVGName()
1884     drbd_helper = self.cfg.GetDRBDHelper()
1885     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1886     cluster = self.cfg.GetClusterInfo()
1887     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1888     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1889     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1890     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1891                         for iname in instancelist)
1892     i_non_redundant = [] # Non redundant instances
1893     i_non_a_balanced = [] # Non auto-balanced instances
1894     n_offline = 0 # Count of offline nodes
1895     n_drained = 0 # Count of nodes being drained
1896     node_vol_should = {}
1897
1898     # FIXME: verify OS list
1899     # do local checksums
1900     master_files = [constants.CLUSTER_CONF_FILE]
1901     master_node = self.master_node = self.cfg.GetMasterNode()
1902     master_ip = self.cfg.GetMasterIP()
1903
1904     file_names = ssconf.SimpleStore().GetFileList()
1905     file_names.extend(constants.ALL_CERT_FILES)
1906     file_names.extend(master_files)
1907     if cluster.modify_etc_hosts:
1908       file_names.append(constants.ETC_HOSTS)
1909
1910     local_checksums = utils.FingerprintFiles(file_names)
1911
1912     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1913     node_verify_param = {
1914       constants.NV_FILELIST: file_names,
1915       constants.NV_NODELIST: [node.name for node in nodeinfo
1916                               if not node.offline],
1917       constants.NV_HYPERVISOR: hypervisors,
1918       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1919                                   node.secondary_ip) for node in nodeinfo
1920                                  if not node.offline],
1921       constants.NV_INSTANCELIST: hypervisors,
1922       constants.NV_VERSION: None,
1923       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1924       constants.NV_NODESETUP: None,
1925       constants.NV_TIME: None,
1926       constants.NV_MASTERIP: (master_node, master_ip),
1927       constants.NV_OSLIST: None,
1928       }
1929
1930     if vg_name is not None:
1931       node_verify_param[constants.NV_VGLIST] = None
1932       node_verify_param[constants.NV_LVLIST] = vg_name
1933       node_verify_param[constants.NV_PVLIST] = [vg_name]
1934       node_verify_param[constants.NV_DRBDLIST] = None
1935
1936     if drbd_helper:
1937       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
1938
1939     # Build our expected cluster state
1940     node_image = dict((node.name, self.NodeImage(offline=node.offline,
1941                                                  name=node.name))
1942                       for node in nodeinfo)
1943
1944     for instance in instancelist:
1945       inst_config = instanceinfo[instance]
1946
1947       for nname in inst_config.all_nodes:
1948         if nname not in node_image:
1949           # ghost node
1950           gnode = self.NodeImage(name=nname)
1951           gnode.ghost = True
1952           node_image[nname] = gnode
1953
1954       inst_config.MapLVsByNode(node_vol_should)
1955
1956       pnode = inst_config.primary_node
1957       node_image[pnode].pinst.append(instance)
1958
1959       for snode in inst_config.secondary_nodes:
1960         nimg = node_image[snode]
1961         nimg.sinst.append(instance)
1962         if pnode not in nimg.sbp:
1963           nimg.sbp[pnode] = []
1964         nimg.sbp[pnode].append(instance)
1965
1966     # At this point, we have the in-memory data structures complete,
1967     # except for the runtime information, which we'll gather next
1968
1969     # Due to the way our RPC system works, exact response times cannot be
1970     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1971     # time before and after executing the request, we can at least have a time
1972     # window.
1973     nvinfo_starttime = time.time()
1974     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1975                                            self.cfg.GetClusterName())
1976     nvinfo_endtime = time.time()
1977
1978     all_drbd_map = self.cfg.ComputeDRBDMap()
1979
1980     feedback_fn("* Verifying node status")
1981
1982     refos_img = None
1983
1984     for node_i in nodeinfo:
1985       node = node_i.name
1986       nimg = node_image[node]
1987
1988       if node_i.offline:
1989         if verbose:
1990           feedback_fn("* Skipping offline node %s" % (node,))
1991         n_offline += 1
1992         continue
1993
1994       if node == master_node:
1995         ntype = "master"
1996       elif node_i.master_candidate:
1997         ntype = "master candidate"
1998       elif node_i.drained:
1999         ntype = "drained"
2000         n_drained += 1
2001       else:
2002         ntype = "regular"
2003       if verbose:
2004         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2005
2006       msg = all_nvinfo[node].fail_msg
2007       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2008       if msg:
2009         nimg.rpc_fail = True
2010         continue
2011
2012       nresult = all_nvinfo[node].payload
2013
2014       nimg.call_ok = self._VerifyNode(node_i, nresult)
2015       self._VerifyNodeNetwork(node_i, nresult)
2016       self._VerifyNodeLVM(node_i, nresult, vg_name)
2017       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2018                             master_files)
2019       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2020                            all_drbd_map)
2021       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2022
2023       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2024       self._UpdateNodeInstances(node_i, nresult, nimg)
2025       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2026       self._UpdateNodeOS(node_i, nresult, nimg)
2027       if not nimg.os_fail:
2028         if refos_img is None:
2029           refos_img = nimg
2030         self._VerifyNodeOS(node_i, nimg, refos_img)
2031
2032     feedback_fn("* Verifying instance status")
2033     for instance in instancelist:
2034       if verbose:
2035         feedback_fn("* Verifying instance %s" % instance)
2036       inst_config = instanceinfo[instance]
2037       self._VerifyInstance(instance, inst_config, node_image)
2038       inst_nodes_offline = []
2039
2040       pnode = inst_config.primary_node
2041       pnode_img = node_image[pnode]
2042       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2043                self.ENODERPC, pnode, "instance %s, connection to"
2044                " primary node failed", instance)
2045
2046       if pnode_img.offline:
2047         inst_nodes_offline.append(pnode)
2048
2049       # If the instance is non-redundant we cannot survive losing its primary
2050       # node, so we are not N+1 compliant. On the other hand we have no disk
2051       # templates with more than one secondary so that situation is not well
2052       # supported either.
2053       # FIXME: does not support file-backed instances
2054       if not inst_config.secondary_nodes:
2055         i_non_redundant.append(instance)
2056       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2057                instance, "instance has multiple secondary nodes: %s",
2058                utils.CommaJoin(inst_config.secondary_nodes),
2059                code=self.ETYPE_WARNING)
2060
2061       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2062         i_non_a_balanced.append(instance)
2063
2064       for snode in inst_config.secondary_nodes:
2065         s_img = node_image[snode]
2066         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2067                  "instance %s, connection to secondary node failed", instance)
2068
2069         if s_img.offline:
2070           inst_nodes_offline.append(snode)
2071
2072       # warn that the instance lives on offline nodes
2073       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2074                "instance lives on offline node(s) %s",
2075                utils.CommaJoin(inst_nodes_offline))
2076       # ... or ghost nodes
2077       for node in inst_config.all_nodes:
2078         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2079                  "instance lives on ghost node %s", node)
2080
2081     feedback_fn("* Verifying orphan volumes")
2082     reserved = utils.FieldSet(*cluster.reserved_lvs)
2083     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2084
2085     feedback_fn("* Verifying orphan instances")
2086     self._VerifyOrphanInstances(instancelist, node_image)
2087
2088     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2089       feedback_fn("* Verifying N+1 Memory redundancy")
2090       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2091
2092     feedback_fn("* Other Notes")
2093     if i_non_redundant:
2094       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2095                   % len(i_non_redundant))
2096
2097     if i_non_a_balanced:
2098       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2099                   % len(i_non_a_balanced))
2100
2101     if n_offline:
2102       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2103
2104     if n_drained:
2105       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2106
2107     return not self.bad
2108
2109   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2110     """Analyze the post-hooks' result
2111
2112     This method analyses the hook result, handles it, and sends some
2113     nicely-formatted feedback back to the user.
2114
2115     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2116         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2117     @param hooks_results: the results of the multi-node hooks rpc call
2118     @param feedback_fn: function used send feedback back to the caller
2119     @param lu_result: previous Exec result
2120     @return: the new Exec result, based on the previous result
2121         and hook results
2122
2123     """
2124     # We only really run POST phase hooks, and are only interested in
2125     # their results
2126     if phase == constants.HOOKS_PHASE_POST:
2127       # Used to change hooks' output to proper indentation
2128       indent_re = re.compile('^', re.M)
2129       feedback_fn("* Hooks Results")
2130       assert hooks_results, "invalid result from hooks"
2131
2132       for node_name in hooks_results:
2133         res = hooks_results[node_name]
2134         msg = res.fail_msg
2135         test = msg and not res.offline
2136         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2137                       "Communication failure in hooks execution: %s", msg)
2138         if res.offline or msg:
2139           # No need to investigate payload if node is offline or gave an error.
2140           # override manually lu_result here as _ErrorIf only
2141           # overrides self.bad
2142           lu_result = 1
2143           continue
2144         for script, hkr, output in res.payload:
2145           test = hkr == constants.HKR_FAIL
2146           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2147                         "Script %s failed, output:", script)
2148           if test:
2149             output = indent_re.sub('      ', output)
2150             feedback_fn("%s" % output)
2151             lu_result = 0
2152
2153       return lu_result
2154
2155
2156 class LUVerifyDisks(NoHooksLU):
2157   """Verifies the cluster disks status.
2158
2159   """
2160   REQ_BGL = False
2161
2162   def ExpandNames(self):
2163     self.needed_locks = {
2164       locking.LEVEL_NODE: locking.ALL_SET,
2165       locking.LEVEL_INSTANCE: locking.ALL_SET,
2166     }
2167     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2168
2169   def Exec(self, feedback_fn):
2170     """Verify integrity of cluster disks.
2171
2172     @rtype: tuple of three items
2173     @return: a tuple of (dict of node-to-node_error, list of instances
2174         which need activate-disks, dict of instance: (node, volume) for
2175         missing volumes
2176
2177     """
2178     result = res_nodes, res_instances, res_missing = {}, [], {}
2179
2180     vg_name = self.cfg.GetVGName()
2181     nodes = utils.NiceSort(self.cfg.GetNodeList())
2182     instances = [self.cfg.GetInstanceInfo(name)
2183                  for name in self.cfg.GetInstanceList()]
2184
2185     nv_dict = {}
2186     for inst in instances:
2187       inst_lvs = {}
2188       if (not inst.admin_up or
2189           inst.disk_template not in constants.DTS_NET_MIRROR):
2190         continue
2191       inst.MapLVsByNode(inst_lvs)
2192       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2193       for node, vol_list in inst_lvs.iteritems():
2194         for vol in vol_list:
2195           nv_dict[(node, vol)] = inst
2196
2197     if not nv_dict:
2198       return result
2199
2200     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2201
2202     for node in nodes:
2203       # node_volume
2204       node_res = node_lvs[node]
2205       if node_res.offline:
2206         continue
2207       msg = node_res.fail_msg
2208       if msg:
2209         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2210         res_nodes[node] = msg
2211         continue
2212
2213       lvs = node_res.payload
2214       for lv_name, (_, _, lv_online) in lvs.items():
2215         inst = nv_dict.pop((node, lv_name), None)
2216         if (not lv_online and inst is not None
2217             and inst.name not in res_instances):
2218           res_instances.append(inst.name)
2219
2220     # any leftover items in nv_dict are missing LVs, let's arrange the
2221     # data better
2222     for key, inst in nv_dict.iteritems():
2223       if inst.name not in res_missing:
2224         res_missing[inst.name] = []
2225       res_missing[inst.name].append(key)
2226
2227     return result
2228
2229
2230 class LURepairDiskSizes(NoHooksLU):
2231   """Verifies the cluster disks sizes.
2232
2233   """
2234   _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2235   REQ_BGL = False
2236
2237   def ExpandNames(self):
2238     if self.op.instances:
2239       self.wanted_names = []
2240       for name in self.op.instances:
2241         full_name = _ExpandInstanceName(self.cfg, name)
2242         self.wanted_names.append(full_name)
2243       self.needed_locks = {
2244         locking.LEVEL_NODE: [],
2245         locking.LEVEL_INSTANCE: self.wanted_names,
2246         }
2247       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2248     else:
2249       self.wanted_names = None
2250       self.needed_locks = {
2251         locking.LEVEL_NODE: locking.ALL_SET,
2252         locking.LEVEL_INSTANCE: locking.ALL_SET,
2253         }
2254     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2255
2256   def DeclareLocks(self, level):
2257     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2258       self._LockInstancesNodes(primary_only=True)
2259
2260   def CheckPrereq(self):
2261     """Check prerequisites.
2262
2263     This only checks the optional instance list against the existing names.
2264
2265     """
2266     if self.wanted_names is None:
2267       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2268
2269     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2270                              in self.wanted_names]
2271
2272   def _EnsureChildSizes(self, disk):
2273     """Ensure children of the disk have the needed disk size.
2274
2275     This is valid mainly for DRBD8 and fixes an issue where the
2276     children have smaller disk size.
2277
2278     @param disk: an L{ganeti.objects.Disk} object
2279
2280     """
2281     if disk.dev_type == constants.LD_DRBD8:
2282       assert disk.children, "Empty children for DRBD8?"
2283       fchild = disk.children[0]
2284       mismatch = fchild.size < disk.size
2285       if mismatch:
2286         self.LogInfo("Child disk has size %d, parent %d, fixing",
2287                      fchild.size, disk.size)
2288         fchild.size = disk.size
2289
2290       # and we recurse on this child only, not on the metadev
2291       return self._EnsureChildSizes(fchild) or mismatch
2292     else:
2293       return False
2294
2295   def Exec(self, feedback_fn):
2296     """Verify the size of cluster disks.
2297
2298     """
2299     # TODO: check child disks too
2300     # TODO: check differences in size between primary/secondary nodes
2301     per_node_disks = {}
2302     for instance in self.wanted_instances:
2303       pnode = instance.primary_node
2304       if pnode not in per_node_disks:
2305         per_node_disks[pnode] = []
2306       for idx, disk in enumerate(instance.disks):
2307         per_node_disks[pnode].append((instance, idx, disk))
2308
2309     changed = []
2310     for node, dskl in per_node_disks.items():
2311       newl = [v[2].Copy() for v in dskl]
2312       for dsk in newl:
2313         self.cfg.SetDiskID(dsk, node)
2314       result = self.rpc.call_blockdev_getsizes(node, newl)
2315       if result.fail_msg:
2316         self.LogWarning("Failure in blockdev_getsizes call to node"
2317                         " %s, ignoring", node)
2318         continue
2319       if len(result.data) != len(dskl):
2320         self.LogWarning("Invalid result from node %s, ignoring node results",
2321                         node)
2322         continue
2323       for ((instance, idx, disk), size) in zip(dskl, result.data):
2324         if size is None:
2325           self.LogWarning("Disk %d of instance %s did not return size"
2326                           " information, ignoring", idx, instance.name)
2327           continue
2328         if not isinstance(size, (int, long)):
2329           self.LogWarning("Disk %d of instance %s did not return valid"
2330                           " size information, ignoring", idx, instance.name)
2331           continue
2332         size = size >> 20
2333         if size != disk.size:
2334           self.LogInfo("Disk %d of instance %s has mismatched size,"
2335                        " correcting: recorded %d, actual %d", idx,
2336                        instance.name, disk.size, size)
2337           disk.size = size
2338           self.cfg.Update(instance, feedback_fn)
2339           changed.append((instance.name, idx, size))
2340         if self._EnsureChildSizes(disk):
2341           self.cfg.Update(instance, feedback_fn)
2342           changed.append((instance.name, idx, disk.size))
2343     return changed
2344
2345
2346 class LURenameCluster(LogicalUnit):
2347   """Rename the cluster.
2348
2349   """
2350   HPATH = "cluster-rename"
2351   HTYPE = constants.HTYPE_CLUSTER
2352   _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2353
2354   def BuildHooksEnv(self):
2355     """Build hooks env.
2356
2357     """
2358     env = {
2359       "OP_TARGET": self.cfg.GetClusterName(),
2360       "NEW_NAME": self.op.name,
2361       }
2362     mn = self.cfg.GetMasterNode()
2363     all_nodes = self.cfg.GetNodeList()
2364     return env, [mn], all_nodes
2365
2366   def CheckPrereq(self):
2367     """Verify that the passed name is a valid one.
2368
2369     """
2370     hostname = netutils.GetHostname(name=self.op.name,
2371                                     family=self.cfg.GetPrimaryIPFamily())
2372
2373     new_name = hostname.name
2374     self.ip = new_ip = hostname.ip
2375     old_name = self.cfg.GetClusterName()
2376     old_ip = self.cfg.GetMasterIP()
2377     if new_name == old_name and new_ip == old_ip:
2378       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2379                                  " cluster has changed",
2380                                  errors.ECODE_INVAL)
2381     if new_ip != old_ip:
2382       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2383         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2384                                    " reachable on the network" %
2385                                    new_ip, errors.ECODE_NOTUNIQUE)
2386
2387     self.op.name = new_name
2388
2389   def Exec(self, feedback_fn):
2390     """Rename the cluster.
2391
2392     """
2393     clustername = self.op.name
2394     ip = self.ip
2395
2396     # shutdown the master IP
2397     master = self.cfg.GetMasterNode()
2398     result = self.rpc.call_node_stop_master(master, False)
2399     result.Raise("Could not disable the master role")
2400
2401     try:
2402       cluster = self.cfg.GetClusterInfo()
2403       cluster.cluster_name = clustername
2404       cluster.master_ip = ip
2405       self.cfg.Update(cluster, feedback_fn)
2406
2407       # update the known hosts file
2408       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2409       node_list = self.cfg.GetNodeList()
2410       try:
2411         node_list.remove(master)
2412       except ValueError:
2413         pass
2414       result = self.rpc.call_upload_file(node_list,
2415                                          constants.SSH_KNOWN_HOSTS_FILE)
2416       for to_node, to_result in result.iteritems():
2417         msg = to_result.fail_msg
2418         if msg:
2419           msg = ("Copy of file %s to node %s failed: %s" %
2420                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2421           self.proc.LogWarning(msg)
2422
2423     finally:
2424       result = self.rpc.call_node_start_master(master, False, False)
2425       msg = result.fail_msg
2426       if msg:
2427         self.LogWarning("Could not re-enable the master role on"
2428                         " the master, please restart manually: %s", msg)
2429
2430     return clustername
2431
2432
2433 class LUSetClusterParams(LogicalUnit):
2434   """Change the parameters of the cluster.
2435
2436   """
2437   HPATH = "cluster-modify"
2438   HTYPE = constants.HTYPE_CLUSTER
2439   _OP_PARAMS = [
2440     ("vg_name", None, ht.TMaybeString),
2441     ("enabled_hypervisors", None,
2442      ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2443             ht.TNone)),
2444     ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2445                               ht.TNone)),
2446     ("beparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2447                               ht.TNone)),
2448     ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2449                             ht.TNone)),
2450     ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2451                               ht.TNone)),
2452     ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2453     ("uid_pool", None, ht.NoType),
2454     ("add_uids", None, ht.NoType),
2455     ("remove_uids", None, ht.NoType),
2456     ("maintain_node_health", None, ht.TMaybeBool),
2457     ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2458     ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2459     ("default_iallocator", None, ht.TOr(ht.TString, ht.TNone)),
2460     ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2461     ("hidden_os", None, ht.TOr(ht.TListOf(\
2462           ht.TAnd(ht.TList,
2463                 ht.TIsLength(2),
2464                 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2465           ht.TNone)),
2466     ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2467           ht.TAnd(ht.TList,
2468                 ht.TIsLength(2),
2469                 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2470           ht.TNone)),
2471     ]
2472   REQ_BGL = False
2473
2474   def CheckArguments(self):
2475     """Check parameters
2476
2477     """
2478     if self.op.uid_pool:
2479       uidpool.CheckUidPool(self.op.uid_pool)
2480
2481     if self.op.add_uids:
2482       uidpool.CheckUidPool(self.op.add_uids)
2483
2484     if self.op.remove_uids:
2485       uidpool.CheckUidPool(self.op.remove_uids)
2486
2487   def ExpandNames(self):
2488     # FIXME: in the future maybe other cluster params won't require checking on
2489     # all nodes to be modified.
2490     self.needed_locks = {
2491       locking.LEVEL_NODE: locking.ALL_SET,
2492     }
2493     self.share_locks[locking.LEVEL_NODE] = 1
2494
2495   def BuildHooksEnv(self):
2496     """Build hooks env.
2497
2498     """
2499     env = {
2500       "OP_TARGET": self.cfg.GetClusterName(),
2501       "NEW_VG_NAME": self.op.vg_name,
2502       }
2503     mn = self.cfg.GetMasterNode()
2504     return env, [mn], [mn]
2505
2506   def CheckPrereq(self):
2507     """Check prerequisites.
2508
2509     This checks whether the given params don't conflict and
2510     if the given volume group is valid.
2511
2512     """
2513     if self.op.vg_name is not None and not self.op.vg_name:
2514       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2515         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2516                                    " instances exist", errors.ECODE_INVAL)
2517
2518     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2519       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2520         raise errors.OpPrereqError("Cannot disable drbd helper while"
2521                                    " drbd-based instances exist",
2522                                    errors.ECODE_INVAL)
2523
2524     node_list = self.acquired_locks[locking.LEVEL_NODE]
2525
2526     # if vg_name not None, checks given volume group on all nodes
2527     if self.op.vg_name:
2528       vglist = self.rpc.call_vg_list(node_list)
2529       for node in node_list:
2530         msg = vglist[node].fail_msg
2531         if msg:
2532           # ignoring down node
2533           self.LogWarning("Error while gathering data on node %s"
2534                           " (ignoring node): %s", node, msg)
2535           continue
2536         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2537                                               self.op.vg_name,
2538                                               constants.MIN_VG_SIZE)
2539         if vgstatus:
2540           raise errors.OpPrereqError("Error on node '%s': %s" %
2541                                      (node, vgstatus), errors.ECODE_ENVIRON)
2542
2543     if self.op.drbd_helper:
2544       # checks given drbd helper on all nodes
2545       helpers = self.rpc.call_drbd_helper(node_list)
2546       for node in node_list:
2547         ninfo = self.cfg.GetNodeInfo(node)
2548         if ninfo.offline:
2549           self.LogInfo("Not checking drbd helper on offline node %s", node)
2550           continue
2551         msg = helpers[node].fail_msg
2552         if msg:
2553           raise errors.OpPrereqError("Error checking drbd helper on node"
2554                                      " '%s': %s" % (node, msg),
2555                                      errors.ECODE_ENVIRON)
2556         node_helper = helpers[node].payload
2557         if node_helper != self.op.drbd_helper:
2558           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2559                                      (node, node_helper), errors.ECODE_ENVIRON)
2560
2561     self.cluster = cluster = self.cfg.GetClusterInfo()
2562     # validate params changes
2563     if self.op.beparams:
2564       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2565       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2566
2567     if self.op.nicparams:
2568       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2569       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2570       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2571       nic_errors = []
2572
2573       # check all instances for consistency
2574       for instance in self.cfg.GetAllInstancesInfo().values():
2575         for nic_idx, nic in enumerate(instance.nics):
2576           params_copy = copy.deepcopy(nic.nicparams)
2577           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2578
2579           # check parameter syntax
2580           try:
2581             objects.NIC.CheckParameterSyntax(params_filled)
2582           except errors.ConfigurationError, err:
2583             nic_errors.append("Instance %s, nic/%d: %s" %
2584                               (instance.name, nic_idx, err))
2585
2586           # if we're moving instances to routed, check that they have an ip
2587           target_mode = params_filled[constants.NIC_MODE]
2588           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2589             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2590                               (instance.name, nic_idx))
2591       if nic_errors:
2592         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2593                                    "\n".join(nic_errors))
2594
2595     # hypervisor list/parameters
2596     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2597     if self.op.hvparams:
2598       for hv_name, hv_dict in self.op.hvparams.items():
2599         if hv_name not in self.new_hvparams:
2600           self.new_hvparams[hv_name] = hv_dict
2601         else:
2602           self.new_hvparams[hv_name].update(hv_dict)
2603
2604     # os hypervisor parameters
2605     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2606     if self.op.os_hvp:
2607       for os_name, hvs in self.op.os_hvp.items():
2608         if os_name not in self.new_os_hvp:
2609           self.new_os_hvp[os_name] = hvs
2610         else:
2611           for hv_name, hv_dict in hvs.items():
2612             if hv_name not in self.new_os_hvp[os_name]:
2613               self.new_os_hvp[os_name][hv_name] = hv_dict
2614             else:
2615               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2616
2617     # os parameters
2618     self.new_osp = objects.FillDict(cluster.osparams, {})
2619     if self.op.osparams:
2620       for os_name, osp in self.op.osparams.items():
2621         if os_name not in self.new_osp:
2622           self.new_osp[os_name] = {}
2623
2624         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2625                                                   use_none=True)
2626
2627         if not self.new_osp[os_name]:
2628           # we removed all parameters
2629           del self.new_osp[os_name]
2630         else:
2631           # check the parameter validity (remote check)
2632           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2633                          os_name, self.new_osp[os_name])
2634
2635     # changes to the hypervisor list
2636     if self.op.enabled_hypervisors is not None:
2637       self.hv_list = self.op.enabled_hypervisors
2638       for hv in self.hv_list:
2639         # if the hypervisor doesn't already exist in the cluster
2640         # hvparams, we initialize it to empty, and then (in both
2641         # cases) we make sure to fill the defaults, as we might not
2642         # have a complete defaults list if the hypervisor wasn't
2643         # enabled before
2644         if hv not in new_hvp:
2645           new_hvp[hv] = {}
2646         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2647         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2648     else:
2649       self.hv_list = cluster.enabled_hypervisors
2650
2651     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2652       # either the enabled list has changed, or the parameters have, validate
2653       for hv_name, hv_params in self.new_hvparams.items():
2654         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2655             (self.op.enabled_hypervisors and
2656              hv_name in self.op.enabled_hypervisors)):
2657           # either this is a new hypervisor, or its parameters have changed
2658           hv_class = hypervisor.GetHypervisor(hv_name)
2659           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2660           hv_class.CheckParameterSyntax(hv_params)
2661           _CheckHVParams(self, node_list, hv_name, hv_params)
2662
2663     if self.op.os_hvp:
2664       # no need to check any newly-enabled hypervisors, since the
2665       # defaults have already been checked in the above code-block
2666       for os_name, os_hvp in self.new_os_hvp.items():
2667         for hv_name, hv_params in os_hvp.items():
2668           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2669           # we need to fill in the new os_hvp on top of the actual hv_p
2670           cluster_defaults = self.new_hvparams.get(hv_name, {})
2671           new_osp = objects.FillDict(cluster_defaults, hv_params)
2672           hv_class = hypervisor.GetHypervisor(hv_name)
2673           hv_class.CheckParameterSyntax(new_osp)
2674           _CheckHVParams(self, node_list, hv_name, new_osp)
2675
2676     if self.op.default_iallocator:
2677       alloc_script = utils.FindFile(self.op.default_iallocator,
2678                                     constants.IALLOCATOR_SEARCH_PATH,
2679                                     os.path.isfile)
2680       if alloc_script is None:
2681         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2682                                    " specified" % self.op.default_iallocator,
2683                                    errors.ECODE_INVAL)
2684
2685   def Exec(self, feedback_fn):
2686     """Change the parameters of the cluster.
2687
2688     """
2689     if self.op.vg_name is not None:
2690       new_volume = self.op.vg_name
2691       if not new_volume:
2692         new_volume = None
2693       if new_volume != self.cfg.GetVGName():
2694         self.cfg.SetVGName(new_volume)
2695       else:
2696         feedback_fn("Cluster LVM configuration already in desired"
2697                     " state, not changing")
2698     if self.op.drbd_helper is not None:
2699       new_helper = self.op.drbd_helper
2700       if not new_helper:
2701         new_helper = None
2702       if new_helper != self.cfg.GetDRBDHelper():
2703         self.cfg.SetDRBDHelper(new_helper)
2704       else:
2705         feedback_fn("Cluster DRBD helper already in desired state,"
2706                     " not changing")
2707     if self.op.hvparams:
2708       self.cluster.hvparams = self.new_hvparams
2709     if self.op.os_hvp:
2710       self.cluster.os_hvp = self.new_os_hvp
2711     if self.op.enabled_hypervisors is not None:
2712       self.cluster.hvparams = self.new_hvparams
2713       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2714     if self.op.beparams:
2715       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2716     if self.op.nicparams:
2717       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2718     if self.op.osparams:
2719       self.cluster.osparams = self.new_osp
2720
2721     if self.op.candidate_pool_size is not None:
2722       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2723       # we need to update the pool size here, otherwise the save will fail
2724       _AdjustCandidatePool(self, [])
2725
2726     if self.op.maintain_node_health is not None:
2727       self.cluster.maintain_node_health = self.op.maintain_node_health
2728
2729     if self.op.add_uids is not None:
2730       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2731
2732     if self.op.remove_uids is not None:
2733       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2734
2735     if self.op.uid_pool is not None:
2736       self.cluster.uid_pool = self.op.uid_pool
2737
2738     if self.op.default_iallocator is not None:
2739       self.cluster.default_iallocator = self.op.default_iallocator
2740
2741     if self.op.reserved_lvs is not None:
2742       self.cluster.reserved_lvs = self.op.reserved_lvs
2743
2744     def helper_os(aname, mods, desc):
2745       desc += " OS list"
2746       lst = getattr(self.cluster, aname)
2747       for key, val in mods:
2748         if key == constants.DDM_ADD:
2749           if val in lst:
2750             feedback_fn("OS %s already in %s, ignoring", val, desc)
2751           else:
2752             lst.append(val)
2753         elif key == constants.DDM_REMOVE:
2754           if val in lst:
2755             lst.remove(val)
2756           else:
2757             feedback_fn("OS %s not found in %s, ignoring", val, desc)
2758         else:
2759           raise errors.ProgrammerError("Invalid modification '%s'" % key)
2760
2761     if self.op.hidden_os:
2762       helper_os("hidden_os", self.op.hidden_os, "hidden")
2763
2764     if self.op.blacklisted_os:
2765       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2766
2767     self.cfg.Update(self.cluster, feedback_fn)
2768
2769
2770 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2771   """Distribute additional files which are part of the cluster configuration.
2772
2773   ConfigWriter takes care of distributing the config and ssconf files, but
2774   there are more files which should be distributed to all nodes. This function
2775   makes sure those are copied.
2776
2777   @param lu: calling logical unit
2778   @param additional_nodes: list of nodes not in the config to distribute to
2779
2780   """
2781   # 1. Gather target nodes
2782   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2783   dist_nodes = lu.cfg.GetOnlineNodeList()
2784   if additional_nodes is not None:
2785     dist_nodes.extend(additional_nodes)
2786   if myself.name in dist_nodes:
2787     dist_nodes.remove(myself.name)
2788
2789   # 2. Gather files to distribute
2790   dist_files = set([constants.ETC_HOSTS,
2791                     constants.SSH_KNOWN_HOSTS_FILE,
2792                     constants.RAPI_CERT_FILE,
2793                     constants.RAPI_USERS_FILE,
2794                     constants.CONFD_HMAC_KEY,
2795                     constants.CLUSTER_DOMAIN_SECRET_FILE,
2796                    ])
2797
2798   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2799   for hv_name in enabled_hypervisors:
2800     hv_class = hypervisor.GetHypervisor(hv_name)
2801     dist_files.update(hv_class.GetAncillaryFiles())
2802
2803   # 3. Perform the files upload
2804   for fname in dist_files:
2805     if os.path.exists(fname):
2806       result = lu.rpc.call_upload_file(dist_nodes, fname)
2807       for to_node, to_result in result.items():
2808         msg = to_result.fail_msg
2809         if msg:
2810           msg = ("Copy of file %s to node %s failed: %s" %
2811                  (fname, to_node, msg))
2812           lu.proc.LogWarning(msg)
2813
2814
2815 class LURedistributeConfig(NoHooksLU):
2816   """Force the redistribution of cluster configuration.
2817
2818   This is a very simple LU.
2819
2820   """
2821   REQ_BGL = False
2822
2823   def ExpandNames(self):
2824     self.needed_locks = {
2825       locking.LEVEL_NODE: locking.ALL_SET,
2826     }
2827     self.share_locks[locking.LEVEL_NODE] = 1
2828
2829   def Exec(self, feedback_fn):
2830     """Redistribute the configuration.
2831
2832     """
2833     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2834     _RedistributeAncillaryFiles(self)
2835
2836
2837 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2838   """Sleep and poll for an instance's disk to sync.
2839
2840   """
2841   if not instance.disks or disks is not None and not disks:
2842     return True
2843
2844   disks = _ExpandCheckDisks(instance, disks)
2845
2846   if not oneshot:
2847     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2848
2849   node = instance.primary_node
2850
2851   for dev in disks:
2852     lu.cfg.SetDiskID(dev, node)
2853
2854   # TODO: Convert to utils.Retry
2855
2856   retries = 0
2857   degr_retries = 10 # in seconds, as we sleep 1 second each time
2858   while True:
2859     max_time = 0
2860     done = True
2861     cumul_degraded = False
2862     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2863     msg = rstats.fail_msg
2864     if msg:
2865       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2866       retries += 1
2867       if retries >= 10:
2868         raise errors.RemoteError("Can't contact node %s for mirror data,"
2869                                  " aborting." % node)
2870       time.sleep(6)
2871       continue
2872     rstats = rstats.payload
2873     retries = 0
2874     for i, mstat in enumerate(rstats):
2875       if mstat is None:
2876         lu.LogWarning("Can't compute data for node %s/%s",
2877                            node, disks[i].iv_name)
2878         continue
2879
2880       cumul_degraded = (cumul_degraded or
2881                         (mstat.is_degraded and mstat.sync_percent is None))
2882       if mstat.sync_percent is not None:
2883         done = False
2884         if mstat.estimated_time is not None:
2885           rem_time = ("%s remaining (estimated)" %
2886                       utils.FormatSeconds(mstat.estimated_time))
2887           max_time = mstat.estimated_time
2888         else:
2889           rem_time = "no time estimate"
2890         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2891                         (disks[i].iv_name, mstat.sync_percent, rem_time))
2892
2893     # if we're done but degraded, let's do a few small retries, to
2894     # make sure we see a stable and not transient situation; therefore
2895     # we force restart of the loop
2896     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2897       logging.info("Degraded disks found, %d retries left", degr_retries)
2898       degr_retries -= 1
2899       time.sleep(1)
2900       continue
2901
2902     if done or oneshot:
2903       break
2904
2905     time.sleep(min(60, max_time))
2906
2907   if done:
2908     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2909   return not cumul_degraded
2910
2911
2912 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2913   """Check that mirrors are not degraded.
2914
2915   The ldisk parameter, if True, will change the test from the
2916   is_degraded attribute (which represents overall non-ok status for
2917   the device(s)) to the ldisk (representing the local storage status).
2918
2919   """
2920   lu.cfg.SetDiskID(dev, node)
2921
2922   result = True
2923
2924   if on_primary or dev.AssembleOnSecondary():
2925     rstats = lu.rpc.call_blockdev_find(node, dev)
2926     msg = rstats.fail_msg
2927     if msg:
2928       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2929       result = False
2930     elif not rstats.payload:
2931       lu.LogWarning("Can't find disk on node %s", node)
2932       result = False
2933     else:
2934       if ldisk:
2935         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2936       else:
2937         result = result and not rstats.payload.is_degraded
2938
2939   if dev.children:
2940     for child in dev.children:
2941       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2942
2943   return result
2944
2945
2946 class LUDiagnoseOS(NoHooksLU):
2947   """Logical unit for OS diagnose/query.
2948
2949   """
2950   _OP_PARAMS = [
2951     _POutputFields,
2952     ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
2953     ]
2954   REQ_BGL = False
2955   _HID = "hidden"
2956   _BLK = "blacklisted"
2957   _VLD = "valid"
2958   _FIELDS_STATIC = utils.FieldSet()
2959   _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
2960                                    "parameters", "api_versions", _HID, _BLK)
2961
2962   def CheckArguments(self):
2963     if self.op.names:
2964       raise errors.OpPrereqError("Selective OS query not supported",
2965                                  errors.ECODE_INVAL)
2966
2967     _CheckOutputFields(static=self._FIELDS_STATIC,
2968                        dynamic=self._FIELDS_DYNAMIC,
2969                        selected=self.op.output_fields)
2970
2971   def ExpandNames(self):
2972     # Lock all nodes, in shared mode
2973     # Temporary removal of locks, should be reverted later
2974     # TODO: reintroduce locks when they are lighter-weight
2975     self.needed_locks = {}
2976     #self.share_locks[locking.LEVEL_NODE] = 1
2977     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2978
2979   @staticmethod
2980   def _DiagnoseByOS(rlist):
2981     """Remaps a per-node return list into an a per-os per-node dictionary
2982
2983     @param rlist: a map with node names as keys and OS objects as values
2984
2985     @rtype: dict
2986     @return: a dictionary with osnames as keys and as value another
2987         map, with nodes as keys and tuples of (path, status, diagnose,
2988         variants, parameters, api_versions) as values, eg::
2989
2990           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2991                                      (/srv/..., False, "invalid api")],
2992                            "node2": [(/srv/..., True, "", [], [])]}
2993           }
2994
2995     """
2996     all_os = {}
2997     # we build here the list of nodes that didn't fail the RPC (at RPC
2998     # level), so that nodes with a non-responding node daemon don't
2999     # make all OSes invalid
3000     good_nodes = [node_name for node_name in rlist
3001                   if not rlist[node_name].fail_msg]
3002     for node_name, nr in rlist.items():
3003       if nr.fail_msg or not nr.payload:
3004         continue
3005       for (name, path, status, diagnose, variants,
3006            params, api_versions) in nr.payload:
3007         if name not in all_os:
3008           # build a list of nodes for this os containing empty lists
3009           # for each node in node_list
3010           all_os[name] = {}
3011           for nname in good_nodes:
3012             all_os[name][nname] = []
3013         # convert params from [name, help] to (name, help)
3014         params = [tuple(v) for v in params]
3015         all_os[name][node_name].append((path, status, diagnose,
3016                                         variants, params, api_versions))
3017     return all_os
3018
3019   def Exec(self, feedback_fn):
3020     """Compute the list of OSes.
3021
3022     """
3023     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3024     node_data = self.rpc.call_os_diagnose(valid_nodes)
3025     pol = self._DiagnoseByOS(node_data)
3026     output = []
3027     cluster = self.cfg.GetClusterInfo()
3028
3029     for os_name in utils.NiceSort(pol.keys()):
3030       os_data = pol[os_name]
3031       row = []
3032       valid = True
3033       (variants, params, api_versions) = null_state = (set(), set(), set())
3034       for idx, osl in enumerate(os_data.values()):
3035         valid = bool(valid and osl and osl[0][1])
3036         if not valid:
3037           (variants, params, api_versions) = null_state
3038           break
3039         node_variants, node_params, node_api = osl[0][3:6]
3040         if idx == 0: # first entry
3041           variants = set(node_variants)
3042           params = set(node_params)
3043           api_versions = set(node_api)
3044         else: # keep consistency
3045           variants.intersection_update(node_variants)
3046           params.intersection_update(node_params)
3047           api_versions.intersection_update(node_api)
3048
3049       is_hid = os_name in cluster.hidden_os
3050       is_blk = os_name in cluster.blacklisted_os
3051       if ((self._HID not in self.op.output_fields and is_hid) or
3052           (self._BLK not in self.op.output_fields and is_blk) or
3053           (self._VLD not in self.op.output_fields and not valid)):
3054         continue
3055
3056       for field in self.op.output_fields:
3057         if field == "name":
3058           val = os_name
3059         elif field == self._VLD:
3060           val = valid
3061         elif field == "node_status":
3062           # this is just a copy of the dict
3063           val = {}
3064           for node_name, nos_list in os_data.items():
3065             val[node_name] = nos_list
3066         elif field == "variants":
3067           val = utils.NiceSort(list(variants))
3068         elif field == "parameters":
3069           val = list(params)
3070         elif field == "api_versions":
3071           val = list(api_versions)
3072         elif field == self._HID:
3073           val = is_hid
3074         elif field == self._BLK:
3075           val = is_blk
3076         else:
3077           raise errors.ParameterError(field)
3078         row.append(val)
3079       output.append(row)
3080
3081     return output
3082
3083
3084 class LURemoveNode(LogicalUnit):
3085   """Logical unit for removing a node.
3086
3087   """
3088   HPATH = "node-remove"
3089   HTYPE = constants.HTYPE_NODE
3090   _OP_PARAMS = [
3091     _PNodeName,
3092     ]
3093
3094   def BuildHooksEnv(self):
3095     """Build hooks env.
3096
3097     This doesn't run on the target node in the pre phase as a failed
3098     node would then be impossible to remove.
3099
3100     """
3101     env = {
3102       "OP_TARGET": self.op.node_name,
3103       "NODE_NAME": self.op.node_name,
3104       }
3105     all_nodes = self.cfg.GetNodeList()
3106     try:
3107       all_nodes.remove(self.op.node_name)
3108     except ValueError:
3109       logging.warning("Node %s which is about to be removed not found"
3110                       " in the all nodes list", self.op.node_name)
3111     return env, all_nodes, all_nodes
3112
3113   def CheckPrereq(self):
3114     """Check prerequisites.
3115
3116     This checks:
3117      - the node exists in the configuration
3118      - it does not have primary or secondary instances
3119      - it's not the master
3120
3121     Any errors are signaled by raising errors.OpPrereqError.
3122
3123     """
3124     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3125     node = self.cfg.GetNodeInfo(self.op.node_name)
3126     assert node is not None
3127
3128     instance_list = self.cfg.GetInstanceList()
3129
3130     masternode = self.cfg.GetMasterNode()
3131     if node.name == masternode:
3132       raise errors.OpPrereqError("Node is the master node,"
3133                                  " you need to failover first.",
3134                                  errors.ECODE_INVAL)
3135
3136     for instance_name in instance_list:
3137       instance = self.cfg.GetInstanceInfo(instance_name)
3138       if node.name in instance.all_nodes:
3139         raise errors.OpPrereqError("Instance %s is still running on the node,"
3140                                    " please remove first." % instance_name,
3141                                    errors.ECODE_INVAL)
3142     self.op.node_name = node.name
3143     self.node = node
3144
3145   def Exec(self, feedback_fn):
3146     """Removes the node from the cluster.
3147
3148     """
3149     node = self.node
3150     logging.info("Stopping the node daemon and removing configs from node %s",
3151                  node.name)
3152
3153     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3154
3155     # Promote nodes to master candidate as needed
3156     _AdjustCandidatePool(self, exceptions=[node.name])
3157     self.context.RemoveNode(node.name)
3158
3159     # Run post hooks on the node before it's removed
3160     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3161     try:
3162       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3163     except:
3164       # pylint: disable-msg=W0702
3165       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3166
3167     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3168     msg = result.fail_msg
3169     if msg:
3170       self.LogWarning("Errors encountered on the remote node while leaving"
3171                       " the cluster: %s", msg)
3172
3173     # Remove node from our /etc/hosts
3174     if self.cfg.GetClusterInfo().modify_etc_hosts:
3175       master_node = self.cfg.GetMasterNode()
3176       result = self.rpc.call_etc_hosts_modify(master_node,
3177                                               constants.ETC_HOSTS_REMOVE,
3178                                               node.name, None)
3179       result.Raise("Can't update hosts file with new host data")
3180       _RedistributeAncillaryFiles(self)
3181
3182
3183 class LUQueryNodes(NoHooksLU):
3184   """Logical unit for querying nodes.
3185
3186   """
3187   # pylint: disable-msg=W0142
3188   _OP_PARAMS = [
3189     _POutputFields,
3190     ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3191     ("use_locking", False, ht.TBool),
3192     ]
3193   REQ_BGL = False
3194
3195   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3196                     "master_candidate", "offline", "drained"]
3197
3198   _FIELDS_DYNAMIC = utils.FieldSet(
3199     "dtotal", "dfree",
3200     "mtotal", "mnode", "mfree",
3201     "bootid",
3202     "ctotal", "cnodes", "csockets",
3203     )
3204
3205   _FIELDS_STATIC = utils.FieldSet(*[
3206     "pinst_cnt", "sinst_cnt",
3207     "pinst_list", "sinst_list",
3208     "pip", "sip", "tags",
3209     "master",
3210     "role"] + _SIMPLE_FIELDS
3211     )
3212
3213   def CheckArguments(self):
3214     _CheckOutputFields(static=self._FIELDS_STATIC,
3215                        dynamic=self._FIELDS_DYNAMIC,
3216                        selected=self.op.output_fields)
3217
3218   def ExpandNames(self):
3219     self.needed_locks = {}
3220     self.share_locks[locking.LEVEL_NODE] = 1
3221
3222     if self.op.names:
3223       self.wanted = _GetWantedNodes(self, self.op.names)
3224     else:
3225       self.wanted = locking.ALL_SET
3226
3227     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3228     self.do_locking = self.do_node_query and self.op.use_locking
3229     if self.do_locking:
3230       # if we don't request only static fields, we need to lock the nodes
3231       self.needed_locks[locking.LEVEL_NODE] = self.wanted
3232
3233   def Exec(self, feedback_fn):
3234     """Computes the list of nodes and their attributes.
3235
3236     """
3237     all_info = self.cfg.GetAllNodesInfo()
3238     if self.do_locking:
3239       nodenames = self.acquired_locks[locking.LEVEL_NODE]
3240     elif self.wanted != locking.ALL_SET:
3241       nodenames = self.wanted
3242       missing = set(nodenames).difference(all_info.keys())
3243       if missing:
3244         raise errors.OpExecError(
3245           "Some nodes were removed before retrieving their data: %s" % missing)
3246     else:
3247       nodenames = all_info.keys()
3248
3249     nodenames = utils.NiceSort(nodenames)
3250     nodelist = [all_info[name] for name in nodenames]
3251
3252     # begin data gathering
3253
3254     if self.do_node_query:
3255       live_data = {}
3256       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3257                                           self.cfg.GetHypervisorType())
3258       for name in nodenames:
3259         nodeinfo = node_data[name]
3260         if not nodeinfo.fail_msg and nodeinfo.payload:
3261           nodeinfo = nodeinfo.payload
3262           fn = utils.TryConvert
3263           live_data[name] = {
3264             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3265             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3266             "mfree": fn(int, nodeinfo.get('memory_free', None)),
3267             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3268             "dfree": fn(int, nodeinfo.get('vg_free', None)),
3269             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3270             "bootid": nodeinfo.get('bootid', None),
3271             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3272             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3273             }
3274         else:
3275           live_data[name] = {}
3276     else:
3277       live_data = dict.fromkeys(nodenames, {})
3278
3279     node_to_primary = dict([(name, set()) for name in nodenames])
3280     node_to_secondary = dict([(name, set()) for name in nodenames])
3281
3282     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3283                              "sinst_cnt", "sinst_list"))
3284     if inst_fields & frozenset(self.op.output_fields):
3285       inst_data = self.cfg.GetAllInstancesInfo()
3286
3287       for inst in inst_data.values():
3288         if inst.primary_node in node_to_primary:
3289           node_to_primary[inst.primary_node].add(inst.name)
3290         for secnode in inst.secondary_nodes:
3291           if secnode in node_to_secondary:
3292             node_to_secondary[secnode].add(inst.name)
3293
3294     master_node = self.cfg.GetMasterNode()
3295
3296     # end data gathering
3297
3298     output = []
3299     for node in nodelist:
3300       node_output = []
3301       for field in self.op.output_fields:
3302         if field in self._SIMPLE_FIELDS:
3303           val = getattr(node, field)
3304         elif field == "pinst_list":
3305           val = list(node_to_primary[node.name])
3306         elif field == "sinst_list":
3307           val = list(node_to_secondary[node.name])
3308         elif field == "pinst_cnt":
3309           val = len(node_to_primary[node.name])
3310         elif field == "sinst_cnt":
3311           val = len(node_to_secondary[node.name])
3312         elif field == "pip":
3313           val = node.primary_ip
3314         elif field == "sip":
3315           val = node.secondary_ip
3316         elif field == "tags":
3317           val = list(node.GetTags())
3318         elif field == "master":
3319           val = node.name == master_node
3320         elif self._FIELDS_DYNAMIC.Matches(field):
3321           val = live_data[node.name].get(field, None)
3322         elif field == "role":
3323           if node.name == master_node:
3324             val = "M"
3325           elif node.master_candidate:
3326             val = "C"
3327           elif node.drained:
3328             val = "D"
3329           elif node.offline:
3330             val = "O"
3331           else:
3332             val = "R"
3333         else:
3334           raise errors.ParameterError(field)
3335         node_output.append(val)
3336       output.append(node_output)
3337
3338     return output
3339
3340
3341 class LUQueryNodeVolumes(NoHooksLU):
3342   """Logical unit for getting volumes on node(s).
3343
3344   """
3345   _OP_PARAMS = [
3346     ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3347     ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3348     ]
3349   REQ_BGL = False
3350   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3351   _FIELDS_STATIC = utils.FieldSet("node")
3352
3353   def CheckArguments(self):
3354     _CheckOutputFields(static=self._FIELDS_STATIC,
3355                        dynamic=self._FIELDS_DYNAMIC,
3356                        selected=self.op.output_fields)
3357
3358   def ExpandNames(self):
3359     self.needed_locks = {}
3360     self.share_locks[locking.LEVEL_NODE] = 1
3361     if not self.op.nodes:
3362       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3363     else:
3364       self.needed_locks[locking.LEVEL_NODE] = \
3365         _GetWantedNodes(self, self.op.nodes)
3366
3367   def Exec(self, feedback_fn):
3368     """Computes the list of nodes and their attributes.
3369
3370     """
3371     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3372     volumes = self.rpc.call_node_volumes(nodenames)
3373
3374     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3375              in self.cfg.GetInstanceList()]
3376
3377     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3378
3379     output = []
3380     for node in nodenames:
3381       nresult = volumes[node]
3382       if nresult.offline:
3383         continue
3384       msg = nresult.fail_msg
3385       if msg:
3386         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3387         continue
3388
3389       node_vols = nresult.payload[:]
3390       node_vols.sort(key=lambda vol: vol['dev'])
3391
3392       for vol in node_vols:
3393         node_output = []
3394         for field in self.op.output_fields:
3395           if field == "node":
3396             val = node
3397           elif field == "phys":
3398             val = vol['dev']
3399           elif field == "vg":
3400             val = vol['vg']
3401           elif field == "name":
3402             val = vol['name']
3403           elif field == "size":
3404             val = int(float(vol['size']))
3405           elif field == "instance":
3406             for inst in ilist:
3407               if node not in lv_by_node[inst]:
3408                 continue
3409               if vol['name'] in lv_by_node[inst][node]:
3410                 val = inst.name
3411                 break
3412             else:
3413               val = '-'
3414           else:
3415             raise errors.ParameterError(field)
3416           node_output.append(str(val))
3417
3418         output.append(node_output)
3419
3420     return output
3421
3422
3423 class LUQueryNodeStorage(NoHooksLU):
3424   """Logical unit for getting information on storage units on node(s).
3425
3426   """
3427   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3428   _OP_PARAMS = [
3429     ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3430     ("storage_type", ht.NoDefault, _CheckStorageType),
3431     ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3432     ("name", None, ht.TMaybeString),
3433     ]
3434   REQ_BGL = False
3435
3436   def CheckArguments(self):
3437     _CheckOutputFields(static=self._FIELDS_STATIC,
3438                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3439                        selected=self.op.output_fields)
3440
3441   def ExpandNames(self):
3442     self.needed_locks = {}
3443     self.share_locks[locking.LEVEL_NODE] = 1
3444
3445     if self.op.nodes:
3446       self.needed_locks[locking.LEVEL_NODE] = \
3447         _GetWantedNodes(self, self.op.nodes)
3448     else:
3449       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3450
3451   def Exec(self, feedback_fn):
3452     """Computes the list of nodes and their attributes.
3453
3454     """
3455     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3456
3457     # Always get name to sort by
3458     if constants.SF_NAME in self.op.output_fields:
3459       fields = self.op.output_fields[:]
3460     else:
3461       fields = [constants.SF_NAME] + self.op.output_fields
3462
3463     # Never ask for node or type as it's only known to the LU
3464     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3465       while extra in fields:
3466         fields.remove(extra)
3467
3468     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3469     name_idx = field_idx[constants.SF_NAME]
3470
3471     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3472     data = self.rpc.call_storage_list(self.nodes,
3473                                       self.op.storage_type, st_args,
3474                                       self.op.name, fields)
3475
3476     result = []
3477
3478     for node in utils.NiceSort(self.nodes):
3479       nresult = data[node]
3480       if nresult.offline:
3481         continue
3482
3483       msg = nresult.fail_msg
3484       if msg:
3485         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3486         continue
3487
3488       rows = dict([(row[name_idx], row) for row in nresult.payload])
3489
3490       for name in utils.NiceSort(rows.keys()):
3491         row = rows[name]
3492
3493         out = []
3494
3495         for field in self.op.output_fields:
3496           if field == constants.SF_NODE:
3497             val = node
3498           elif field == constants.SF_TYPE:
3499             val = self.op.storage_type
3500           elif field in field_idx:
3501             val = row[field_idx[field]]
3502           else:
3503             raise errors.ParameterError(field)
3504
3505           out.append(val)
3506
3507         result.append(out)
3508
3509     return result
3510
3511
3512 class LUModifyNodeStorage(NoHooksLU):
3513   """Logical unit for modifying a storage volume on a node.
3514
3515   """
3516   _OP_PARAMS = [
3517     _PNodeName,
3518     ("storage_type", ht.NoDefault, _CheckStorageType),
3519     ("name", ht.NoDefault, ht.TNonEmptyString),
3520     ("changes", ht.NoDefault, ht.TDict),
3521     ]
3522   REQ_BGL = False
3523
3524   def CheckArguments(self):
3525     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3526
3527     storage_type = self.op.storage_type
3528
3529     try:
3530       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3531     except KeyError:
3532       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3533                                  " modified" % storage_type,
3534                                  errors.ECODE_INVAL)
3535
3536     diff = set(self.op.changes.keys()) - modifiable
3537     if diff:
3538       raise errors.OpPrereqError("The following fields can not be modified for"
3539                                  " storage units of type '%s': %r" %
3540                                  (storage_type, list(diff)),
3541                                  errors.ECODE_INVAL)
3542
3543   def ExpandNames(self):
3544     self.needed_locks = {
3545       locking.LEVEL_NODE: self.op.node_name,
3546       }
3547
3548   def Exec(self, feedback_fn):
3549     """Computes the list of nodes and their attributes.
3550
3551     """
3552     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3553     result = self.rpc.call_storage_modify(self.op.node_name,
3554                                           self.op.storage_type, st_args,
3555                                           self.op.name, self.op.changes)
3556     result.Raise("Failed to modify storage unit '%s' on %s" %
3557                  (self.op.name, self.op.node_name))
3558
3559
3560 class LUAddNode(LogicalUnit):
3561   """Logical unit for adding node to the cluster.
3562
3563   """
3564   HPATH = "node-add"
3565   HTYPE = constants.HTYPE_NODE
3566   _OP_PARAMS = [
3567     _PNodeName,
3568     ("primary_ip", None, ht.NoType),
3569     ("secondary_ip", None, ht.TMaybeString),
3570     ("readd", False, ht.TBool),
3571     ("nodegroup", None, ht.TMaybeString)
3572     ]
3573
3574   def CheckArguments(self):
3575     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3576     # validate/normalize the node name
3577     self.hostname = netutils.GetHostname(name=self.op.node_name,
3578                                          family=self.primary_ip_family)
3579     self.op.node_name = self.hostname.name
3580     if self.op.readd and self.op.nodegroup:
3581       raise errors.OpPrereqError("Cannot pass a nodegroup when a node is"
3582                                  " being readded", errors.ECODE_INVAL)
3583
3584   def BuildHooksEnv(self):
3585     """Build hooks env.
3586
3587     This will run on all nodes before, and on all nodes + the new node after.
3588
3589     """
3590     env = {
3591       "OP_TARGET": self.op.node_name,
3592       "NODE_NAME": self.op.node_name,
3593       "NODE_PIP": self.op.primary_ip,
3594       "NODE_SIP": self.op.secondary_ip,
3595       }
3596     nodes_0 = self.cfg.GetNodeList()
3597     nodes_1 = nodes_0 + [self.op.node_name, ]
3598     return env, nodes_0, nodes_1
3599
3600   def CheckPrereq(self):
3601     """Check prerequisites.
3602
3603     This checks:
3604      - the new node is not already in the config
3605      - it is resolvable
3606      - its parameters (single/dual homed) matches the cluster
3607
3608     Any errors are signaled by raising errors.OpPrereqError.
3609
3610     """
3611     cfg = self.cfg
3612     hostname = self.hostname
3613     node = hostname.name
3614     primary_ip = self.op.primary_ip = hostname.ip
3615     if self.op.secondary_ip is None:
3616       if self.primary_ip_family == netutils.IP6Address.family:
3617         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3618                                    " IPv4 address must be given as secondary",
3619                                    errors.ECODE_INVAL)
3620       self.op.secondary_ip = primary_ip
3621
3622     secondary_ip = self.op.secondary_ip
3623     if not netutils.IP4Address.IsValid(secondary_ip):
3624       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3625                                  " address" % secondary_ip, errors.ECODE_INVAL)
3626
3627     node_list = cfg.GetNodeList()
3628     if not self.op.readd and node in node_list:
3629       raise errors.OpPrereqError("Node %s is already in the configuration" %
3630                                  node, errors.ECODE_EXISTS)
3631     elif self.op.readd and node not in node_list:
3632       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3633                                  errors.ECODE_NOENT)
3634
3635     self.changed_primary_ip = False
3636
3637     for existing_node_name in node_list:
3638       existing_node = cfg.GetNodeInfo(existing_node_name)
3639
3640       if self.op.readd and node == existing_node_name:
3641         if existing_node.secondary_ip != secondary_ip:
3642           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3643                                      " address configuration as before",
3644                                      errors.ECODE_INVAL)
3645         if existing_node.primary_ip != primary_ip:
3646           self.changed_primary_ip = True
3647
3648         continue
3649
3650       if (existing_node.primary_ip == primary_ip or
3651           existing_node.secondary_ip == primary_ip or
3652           existing_node.primary_ip == secondary_ip or
3653           existing_node.secondary_ip == secondary_ip):
3654         raise errors.OpPrereqError("New node ip address(es) conflict with"
3655                                    " existing node %s" % existing_node.name,
3656                                    errors.ECODE_NOTUNIQUE)
3657
3658     # check that the type of the node (single versus dual homed) is the
3659     # same as for the master
3660     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3661     master_singlehomed = myself.secondary_ip == myself.primary_ip
3662     newbie_singlehomed = secondary_ip == primary_ip
3663     if master_singlehomed != newbie_singlehomed:
3664       if master_singlehomed:
3665         raise errors.OpPrereqError("The master has no private ip but the"
3666                                    " new node has one",
3667                                    errors.ECODE_INVAL)
3668       else:
3669         raise errors.OpPrereqError("The master has a private ip but the"
3670                                    " new node doesn't have one",
3671                                    errors.ECODE_INVAL)
3672
3673     # checks reachability
3674     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3675       raise errors.OpPrereqError("Node not reachable by ping",
3676                                  errors.ECODE_ENVIRON)
3677
3678     if not newbie_singlehomed:
3679       # check reachability from my secondary ip to newbie's secondary ip
3680       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3681                            source=myself.secondary_ip):
3682         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3683                                    " based ping to noded port",
3684                                    errors.ECODE_ENVIRON)
3685
3686     if self.op.readd:
3687       exceptions = [node]
3688     else:
3689       exceptions = []
3690
3691     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3692
3693     if self.op.readd:
3694       self.new_node = self.cfg.GetNodeInfo(node)
3695       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3696     else:
3697       nodegroup = cfg.LookupNodeGroup(self.op.nodegroup)
3698       self.new_node = objects.Node(name=node,
3699                                    primary_ip=primary_ip,
3700                                    secondary_ip=secondary_ip,
3701                                    master_candidate=self.master_candidate,
3702                                    offline=False, drained=False,
3703                                    nodegroup=nodegroup)
3704
3705   def Exec(self, feedback_fn):
3706     """Adds the new node to the cluster.
3707
3708     """
3709     new_node = self.new_node
3710     node = new_node.name
3711
3712     # for re-adds, reset the offline/drained/master-candidate flags;
3713     # we need to reset here, otherwise offline would prevent RPC calls
3714     # later in the procedure; this also means that if the re-add
3715     # fails, we are left with a non-offlined, broken node
3716     if self.op.readd:
3717       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3718       self.LogInfo("Readding a node, the offline/drained flags were reset")
3719       # if we demote the node, we do cleanup later in the procedure
3720       new_node.master_candidate = self.master_candidate
3721       if self.changed_primary_ip:
3722         new_node.primary_ip = self.op.primary_ip
3723
3724     # notify the user about any possible mc promotion
3725     if new_node.master_candidate:
3726       self.LogInfo("Node will be a master candidate")
3727
3728     # check connectivity
3729     result = self.rpc.call_version([node])[node]
3730     result.Raise("Can't get version information from node %s" % node)
3731     if constants.PROTOCOL_VERSION == result.payload:
3732       logging.info("Communication to node %s fine, sw version %s match",
3733                    node, result.payload)
3734     else:
3735       raise errors.OpExecError("Version mismatch master version %s,"
3736                                " node version %s" %
3737                                (constants.PROTOCOL_VERSION, result.payload))
3738
3739     # Add node to our /etc/hosts, and add key to known_hosts
3740     if self.cfg.GetClusterInfo().modify_etc_hosts:
3741       master_node = self.cfg.GetMasterNode()
3742       result = self.rpc.call_etc_hosts_modify(master_node,
3743                                               constants.ETC_HOSTS_ADD,
3744                                               self.hostname.name,
3745                                               self.hostname.ip)
3746       result.Raise("Can't update hosts file with new host data")
3747
3748     if new_node.secondary_ip != new_node.primary_ip:
3749       result = self.rpc.call_node_has_ip_address(new_node.name,
3750                                                  new_node.secondary_ip)
3751       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3752                    prereq=True, ecode=errors.ECODE_ENVIRON)
3753       if not result.payload:
3754         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3755                                  " you gave (%s). Please fix and re-run this"
3756                                  " command." % new_node.secondary_ip)
3757
3758     node_verify_list = [self.cfg.GetMasterNode()]
3759     node_verify_param = {
3760       constants.NV_NODELIST: [node],
3761       # TODO: do a node-net-test as well?
3762     }
3763
3764     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3765                                        self.cfg.GetClusterName())
3766     for verifier in node_verify_list:
3767       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3768       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3769       if nl_payload:
3770         for failed in nl_payload:
3771           feedback_fn("ssh/hostname verification failed"
3772                       " (checking from %s): %s" %
3773                       (verifier, nl_payload[failed]))
3774         raise errors.OpExecError("ssh/hostname verification failed.")
3775
3776     if self.op.readd:
3777       _RedistributeAncillaryFiles(self)
3778       self.context.ReaddNode(new_node)
3779       # make sure we redistribute the config
3780       self.cfg.Update(new_node, feedback_fn)
3781       # and make sure the new node will not have old files around
3782       if not new_node.master_candidate:
3783         result = self.rpc.call_node_demote_from_mc(new_node.name)
3784         msg = result.fail_msg
3785         if msg:
3786           self.LogWarning("Node failed to demote itself from master"
3787                           " candidate status: %s" % msg)
3788     else:
3789       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3790       self.context.AddNode(new_node, self.proc.GetECId())
3791
3792
3793 class LUSetNodeParams(LogicalUnit):
3794   """Modifies the parameters of a node.
3795
3796   """
3797   HPATH = "node-modify"
3798   HTYPE = constants.HTYPE_NODE
3799   _OP_PARAMS = [
3800     _PNodeName,
3801     ("master_candidate", None, ht.TMaybeBool),
3802     ("offline", None, ht.TMaybeBool),
3803     ("drained", None, ht.TMaybeBool),
3804     ("auto_promote", False, ht.TBool),
3805     _PForce,
3806     ]
3807   REQ_BGL = False
3808
3809   def CheckArguments(self):
3810     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3811     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3812     if all_mods.count(None) == 3:
3813       raise errors.OpPrereqError("Please pass at least one modification",
3814                                  errors.ECODE_INVAL)
3815     if all_mods.count(True) > 1:
3816       raise errors.OpPrereqError("Can't set the node into more than one"
3817                                  " state at the same time",
3818                                  errors.ECODE_INVAL)
3819
3820     # Boolean value that tells us whether we're offlining or draining the node
3821     self.offline_or_drain = (self.op.offline == True or
3822                              self.op.drained == True)
3823     self.deoffline_or_drain = (self.op.offline == False or
3824                                self.op.drained == False)
3825     self.might_demote = (self.op.master_candidate == False or
3826                          self.offline_or_drain)
3827
3828     self.lock_all = self.op.auto_promote and self.might_demote
3829
3830
3831   def ExpandNames(self):
3832     if self.lock_all:
3833       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3834     else:
3835       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3836
3837   def BuildHooksEnv(self):
3838     """Build hooks env.
3839
3840     This runs on the master node.
3841
3842     """
3843     env = {
3844       "OP_TARGET": self.op.node_name,
3845       "MASTER_CANDIDATE": str(self.op.master_candidate),
3846       "OFFLINE": str(self.op.offline),
3847       "DRAINED": str(self.op.drained),
3848       }
3849     nl = [self.cfg.GetMasterNode(),
3850           self.op.node_name]
3851     return env, nl, nl
3852
3853   def CheckPrereq(self):
3854     """Check prerequisites.
3855
3856     This only checks the instance list against the existing names.
3857
3858     """
3859     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3860
3861     if (self.op.master_candidate is not None or
3862         self.op.drained is not None or
3863         self.op.offline is not None):
3864       # we can't change the master's node flags
3865       if self.op.node_name == self.cfg.GetMasterNode():
3866         raise errors.OpPrereqError("The master role can be changed"
3867                                    " only via master-failover",
3868                                    errors.ECODE_INVAL)
3869
3870
3871     if node.master_candidate and self.might_demote and not self.lock_all:
3872       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3873       # check if after removing the current node, we're missing master
3874       # candidates
3875       (mc_remaining, mc_should, _) = \
3876           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3877       if mc_remaining < mc_should:
3878         raise errors.OpPrereqError("Not enough master candidates, please"
3879                                    " pass auto_promote to allow promotion",
3880                                    errors.ECODE_INVAL)
3881
3882     if (self.op.master_candidate == True and
3883         ((node.offline and not self.op.offline == False) or
3884          (node.drained and not self.op.drained == False))):
3885       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3886                                  " to master_candidate" % node.name,
3887                                  errors.ECODE_INVAL)
3888
3889     # If we're being deofflined/drained, we'll MC ourself if needed
3890     if (self.deoffline_or_drain and not self.offline_or_drain and not
3891         self.op.master_candidate == True and not node.master_candidate):
3892       self.op.master_candidate = _DecideSelfPromotion(self)
3893       if self.op.master_candidate:
3894         self.LogInfo("Autopromoting node to master candidate")
3895
3896     return
3897
3898   def Exec(self, feedback_fn):
3899     """Modifies a node.
3900
3901     """
3902     node = self.node
3903
3904     result = []
3905     changed_mc = False
3906
3907     if self.op.offline is not None:
3908       node.offline = self.op.offline
3909       result.append(("offline", str(self.op.offline)))
3910       if self.op.offline == True:
3911         if node.master_candidate:
3912           node.master_candidate = False
3913           changed_mc = True
3914           result.append(("master_candidate", "auto-demotion due to offline"))
3915         if node.drained:
3916           node.drained = False
3917           result.append(("drained", "clear drained status due to offline"))
3918
3919     if self.op.master_candidate is not None:
3920       node.master_candidate = self.op.master_candidate
3921       changed_mc = True
3922       result.append(("master_candidate", str(self.op.master_candidate)))
3923       if self.op.master_candidate == False:
3924         rrc = self.rpc.call_node_demote_from_mc(node.name)
3925         msg = rrc.fail_msg
3926         if msg:
3927           self.LogWarning("Node failed to demote itself: %s" % msg)
3928
3929     if self.op.drained is not None:
3930       node.drained = self.op.drained
3931       result.append(("drained", str(self.op.drained)))
3932       if self.op.drained == True:
3933         if node.master_candidate:
3934           node.master_candidate = False
3935           changed_mc = True
3936           result.append(("master_candidate", "auto-demotion due to drain"))
3937           rrc = self.rpc.call_node_demote_from_mc(node.name)
3938           msg = rrc.fail_msg
3939           if msg:
3940             self.LogWarning("Node failed to demote itself: %s" % msg)
3941         if node.offline:
3942           node.offline = False
3943           result.append(("offline", "clear offline status due to drain"))
3944
3945     # we locked all nodes, we adjust the CP before updating this node
3946     if self.lock_all:
3947       _AdjustCandidatePool(self, [node.name])
3948
3949     # this will trigger configuration file update, if needed
3950     self.cfg.Update(node, feedback_fn)
3951
3952     # this will trigger job queue propagation or cleanup
3953     if changed_mc:
3954       self.context.ReaddNode(node)
3955
3956     return result
3957
3958
3959 class LUPowercycleNode(NoHooksLU):
3960   """Powercycles a node.
3961
3962   """
3963   _OP_PARAMS = [
3964     _PNodeName,
3965     _PForce,
3966     ]
3967   REQ_BGL = False
3968
3969   def CheckArguments(self):
3970     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3971     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3972       raise errors.OpPrereqError("The node is the master and the force"
3973                                  " parameter was not set",
3974                                  errors.ECODE_INVAL)
3975
3976   def ExpandNames(self):
3977     """Locking for PowercycleNode.
3978
3979     This is a last-resort option and shouldn't block on other
3980     jobs. Therefore, we grab no locks.
3981
3982     """
3983     self.needed_locks = {}
3984
3985   def Exec(self, feedback_fn):
3986     """Reboots a node.
3987
3988     """
3989     result = self.rpc.call_node_powercycle(self.op.node_name,
3990                                            self.cfg.GetHypervisorType())
3991     result.Raise("Failed to schedule the reboot")
3992     return result.payload
3993
3994
3995 class LUQueryClusterInfo(NoHooksLU):
3996   """Query cluster configuration.
3997
3998   """
3999   REQ_BGL = False
4000
4001   def ExpandNames(self):
4002     self.needed_locks = {}
4003
4004   def Exec(self, feedback_fn):
4005     """Return cluster config.
4006
4007     """
4008     cluster = self.cfg.GetClusterInfo()
4009     os_hvp = {}
4010
4011     # Filter just for enabled hypervisors
4012     for os_name, hv_dict in cluster.os_hvp.items():
4013       os_hvp[os_name] = {}
4014       for hv_name, hv_params in hv_dict.items():
4015         if hv_name in cluster.enabled_hypervisors:
4016           os_hvp[os_name][hv_name] = hv_params
4017
4018     # Convert ip_family to ip_version
4019     primary_ip_version = constants.IP4_VERSION
4020     if cluster.primary_ip_family == netutils.IP6Address.family:
4021       primary_ip_version = constants.IP6_VERSION
4022
4023     result = {
4024       "software_version": constants.RELEASE_VERSION,
4025       "protocol_version": constants.PROTOCOL_VERSION,
4026       "config_version": constants.CONFIG_VERSION,
4027       "os_api_version": max(constants.OS_API_VERSIONS),
4028       "export_version": constants.EXPORT_VERSION,
4029       "architecture": (platform.architecture()[0], platform.machine()),
4030       "name": cluster.cluster_name,
4031       "master": cluster.master_node,
4032       "default_hypervisor": cluster.enabled_hypervisors[0],
4033       "enabled_hypervisors": cluster.enabled_hypervisors,
4034       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4035                         for hypervisor_name in cluster.enabled_hypervisors]),
4036       "os_hvp": os_hvp,
4037       "beparams": cluster.beparams,
4038       "osparams": cluster.osparams,
4039       "nicparams": cluster.nicparams,
4040       "candidate_pool_size": cluster.candidate_pool_size,
4041       "master_netdev": cluster.master_netdev,
4042       "volume_group_name": cluster.volume_group_name,
4043       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4044       "file_storage_dir": cluster.file_storage_dir,
4045       "maintain_node_health": cluster.maintain_node_health,
4046       "ctime": cluster.ctime,
4047       "mtime": cluster.mtime,
4048       "uuid": cluster.uuid,
4049       "tags": list(cluster.GetTags()),
4050       "uid_pool": cluster.uid_pool,
4051       "default_iallocator": cluster.default_iallocator,
4052       "reserved_lvs": cluster.reserved_lvs,
4053       "primary_ip_version": primary_ip_version,
4054       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4055       }
4056
4057     return result
4058
4059
4060 class LUQueryConfigValues(NoHooksLU):
4061   """Return configuration values.
4062
4063   """
4064   _OP_PARAMS = [_POutputFields]
4065   REQ_BGL = False
4066   _FIELDS_DYNAMIC = utils.FieldSet()
4067   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4068                                   "watcher_pause", "volume_group_name")
4069
4070   def CheckArguments(self):
4071     _CheckOutputFields(static=self._FIELDS_STATIC,
4072                        dynamic=self._FIELDS_DYNAMIC,
4073                        selected=self.op.output_fields)
4074
4075   def ExpandNames(self):
4076     self.needed_locks = {}
4077
4078   def Exec(self, feedback_fn):
4079     """Dump a representation of the cluster config to the standard output.
4080
4081     """
4082     values = []
4083     for field in self.op.output_fields:
4084       if field == "cluster_name":
4085         entry = self.cfg.GetClusterName()
4086       elif field == "master_node":
4087         entry = self.cfg.GetMasterNode()
4088       elif field == "drain_flag":
4089         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4090       elif field == "watcher_pause":
4091         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4092       elif field == "volume_group_name":
4093         entry = self.cfg.GetVGName()
4094       else:
4095         raise errors.ParameterError(field)
4096       values.append(entry)
4097     return values
4098
4099
4100 class LUActivateInstanceDisks(NoHooksLU):
4101   """Bring up an instance's disks.
4102
4103   """
4104   _OP_PARAMS = [
4105     _PInstanceName,
4106     ("ignore_size", False, ht.TBool),
4107     ]
4108   REQ_BGL = False
4109
4110   def ExpandNames(self):
4111     self._ExpandAndLockInstance()
4112     self.needed_locks[locking.LEVEL_NODE] = []
4113     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4114
4115   def DeclareLocks(self, level):
4116     if level == locking.LEVEL_NODE:
4117       self._LockInstancesNodes()
4118
4119   def CheckPrereq(self):
4120     """Check prerequisites.
4121
4122     This checks that the instance is in the cluster.
4123
4124     """
4125     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4126     assert self.instance is not None, \
4127       "Cannot retrieve locked instance %s" % self.op.instance_name
4128     _CheckNodeOnline(self, self.instance.primary_node)
4129
4130   def Exec(self, feedback_fn):
4131     """Activate the disks.
4132
4133     """
4134     disks_ok, disks_info = \
4135               _AssembleInstanceDisks(self, self.instance,
4136                                      ignore_size=self.op.ignore_size)
4137     if not disks_ok:
4138       raise errors.OpExecError("Cannot activate block devices")
4139
4140     return disks_info
4141
4142
4143 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4144                            ignore_size=False):
4145   """Prepare the block devices for an instance.
4146
4147   This sets up the block devices on all nodes.
4148
4149   @type lu: L{LogicalUnit}
4150   @param lu: the logical unit on whose behalf we execute
4151   @type instance: L{objects.Instance}
4152   @param instance: the instance for whose disks we assemble
4153   @type disks: list of L{objects.Disk} or None
4154   @param disks: which disks to assemble (or all, if None)
4155   @type ignore_secondaries: boolean
4156   @param ignore_secondaries: if true, errors on secondary nodes
4157       won't result in an error return from the function
4158   @type ignore_size: boolean
4159   @param ignore_size: if true, the current known size of the disk
4160       will not be used during the disk activation, useful for cases
4161       when the size is wrong
4162   @return: False if the operation failed, otherwise a list of
4163       (host, instance_visible_name, node_visible_name)
4164       with the mapping from node devices to instance devices
4165
4166   """
4167   device_info = []
4168   disks_ok = True
4169   iname = instance.name
4170   disks = _ExpandCheckDisks(instance, disks)
4171
4172   # With the two passes mechanism we try to reduce the window of
4173   # opportunity for the race condition of switching DRBD to primary
4174   # before handshaking occured, but we do not eliminate it
4175
4176   # The proper fix would be to wait (with some limits) until the
4177   # connection has been made and drbd transitions from WFConnection
4178   # into any other network-connected state (Connected, SyncTarget,
4179   # SyncSource, etc.)
4180
4181   # 1st pass, assemble on all nodes in secondary mode
4182   for inst_disk in disks:
4183     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4184       if ignore_size:
4185         node_disk = node_disk.Copy()
4186         node_disk.UnsetSize()
4187       lu.cfg.SetDiskID(node_disk, node)
4188       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4189       msg = result.fail_msg
4190       if msg:
4191         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4192                            " (is_primary=False, pass=1): %s",
4193                            inst_disk.iv_name, node, msg)
4194         if not ignore_secondaries:
4195           disks_ok = False
4196
4197   # FIXME: race condition on drbd migration to primary
4198
4199   # 2nd pass, do only the primary node
4200   for inst_disk in disks:
4201     dev_path = None
4202
4203     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4204       if node != instance.primary_node:
4205         continue
4206       if ignore_size:
4207         node_disk = node_disk.Copy()
4208         node_disk.UnsetSize()
4209       lu.cfg.SetDiskID(node_disk, node)
4210       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4211       msg = result.fail_msg
4212       if msg:
4213         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4214                            " (is_primary=True, pass=2): %s",
4215                            inst_disk.iv_name, node, msg)
4216         disks_ok = False
4217       else:
4218         dev_path = result.payload
4219
4220     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4221
4222   # leave the disks configured for the primary node
4223   # this is a workaround that would be fixed better by
4224   # improving the logical/physical id handling
4225   for disk in disks:
4226     lu.cfg.SetDiskID(disk, instance.primary_node)
4227
4228   return disks_ok, device_info
4229
4230
4231 def _StartInstanceDisks(lu, instance, force):
4232   """Start the disks of an instance.
4233
4234   """
4235   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4236                                            ignore_secondaries=force)
4237   if not disks_ok:
4238     _ShutdownInstanceDisks(lu, instance)
4239     if force is not None and not force:
4240       lu.proc.LogWarning("", hint="If the message above refers to a"
4241                          " secondary node,"
4242                          " you can retry the operation using '--force'.")
4243     raise errors.OpExecError("Disk consistency error")
4244
4245
4246 class LUDeactivateInstanceDisks(NoHooksLU):
4247   """Shutdown an instance's disks.
4248
4249   """
4250   _OP_PARAMS = [
4251     _PInstanceName,
4252     ]
4253   REQ_BGL = False
4254
4255   def ExpandNames(self):
4256     self._ExpandAndLockInstance()
4257     self.needed_locks[locking.LEVEL_NODE] = []
4258     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4259
4260   def DeclareLocks(self, level):
4261     if level == locking.LEVEL_NODE:
4262       self._LockInstancesNodes()
4263
4264   def CheckPrereq(self):
4265     """Check prerequisites.
4266
4267     This checks that the instance is in the cluster.
4268
4269     """
4270     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4271     assert self.instance is not None, \
4272       "Cannot retrieve locked instance %s" % self.op.instance_name
4273
4274   def Exec(self, feedback_fn):
4275     """Deactivate the disks
4276
4277     """
4278     instance = self.instance
4279     _SafeShutdownInstanceDisks(self, instance)
4280
4281
4282 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4283   """Shutdown block devices of an instance.
4284
4285   This function checks if an instance is running, before calling
4286   _ShutdownInstanceDisks.
4287
4288   """
4289   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4290   _ShutdownInstanceDisks(lu, instance, disks=disks)
4291
4292
4293 def _ExpandCheckDisks(instance, disks):
4294   """Return the instance disks selected by the disks list
4295
4296   @type disks: list of L{objects.Disk} or None
4297   @param disks: selected disks
4298   @rtype: list of L{objects.Disk}
4299   @return: selected instance disks to act on
4300
4301   """
4302   if disks is None:
4303     return instance.disks
4304   else:
4305     if not set(disks).issubset(instance.disks):
4306       raise errors.ProgrammerError("Can only act on disks belonging to the"
4307                                    " target instance")
4308     return disks
4309
4310
4311 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4312   """Shutdown block devices of an instance.
4313
4314   This does the shutdown on all nodes of the instance.
4315
4316   If the ignore_primary is false, errors on the primary node are
4317   ignored.
4318
4319   """
4320   all_result = True
4321   disks = _ExpandCheckDisks(instance, disks)
4322
4323   for disk in disks:
4324     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4325       lu.cfg.SetDiskID(top_disk, node)
4326       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4327       msg = result.fail_msg
4328       if msg:
4329         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4330                       disk.iv_name, node, msg)
4331         if not ignore_primary or node != instance.primary_node:
4332           all_result = False
4333   return all_result
4334
4335
4336 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4337   """Checks if a node has enough free memory.
4338
4339   This function check if a given node has the needed amount of free
4340   memory. In case the node has less memory or we cannot get the
4341   information from the node, this function raise an OpPrereqError
4342   exception.
4343
4344   @type lu: C{LogicalUnit}
4345   @param lu: a logical unit from which we get configuration data
4346   @type node: C{str}
4347   @param node: the node to check
4348   @type reason: C{str}
4349   @param reason: string to use in the error message
4350   @type requested: C{int}
4351   @param requested: the amount of memory in MiB to check for
4352   @type hypervisor_name: C{str}
4353   @param hypervisor_name: the hypervisor to ask for memory stats
4354   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4355       we cannot check the node
4356
4357   """
4358   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4359   nodeinfo[node].Raise("Can't get data from node %s" % node,
4360                        prereq=True, ecode=errors.ECODE_ENVIRON)
4361   free_mem = nodeinfo[node].payload.get('memory_free', None)
4362   if not isinstance(free_mem, int):
4363     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4364                                " was '%s'" % (node, free_mem),
4365                                errors.ECODE_ENVIRON)
4366   if requested > free_mem:
4367     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4368                                " needed %s MiB, available %s MiB" %
4369                                (node, reason, requested, free_mem),
4370                                errors.ECODE_NORES)
4371
4372
4373 def _CheckNodesFreeDisk(lu, nodenames, requested):
4374   """Checks if nodes have enough free disk space in the default VG.
4375
4376   This function check if all given nodes have the needed amount of
4377   free disk. In case any node has less disk or we cannot get the
4378   information from the node, this function raise an OpPrereqError
4379   exception.
4380
4381   @type lu: C{LogicalUnit}
4382   @param lu: a logical unit from which we get configuration data
4383   @type nodenames: C{list}
4384   @param nodenames: the list of node names to check
4385   @type requested: C{int}
4386   @param requested: the amount of disk in MiB to check for
4387   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4388       we cannot check the node
4389
4390   """
4391   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4392                                    lu.cfg.GetHypervisorType())
4393   for node in nodenames:
4394     info = nodeinfo[node]
4395     info.Raise("Cannot get current information from node %s" % node,
4396                prereq=True, ecode=errors.ECODE_ENVIRON)
4397     vg_free = info.payload.get("vg_free", None)
4398     if not isinstance(vg_free, int):
4399       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4400                                  " result was '%s'" % (node, vg_free),
4401                                  errors.ECODE_ENVIRON)
4402     if requested > vg_free:
4403       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4404                                  " required %d MiB, available %d MiB" %
4405                                  (node, requested, vg_free),
4406                                  errors.ECODE_NORES)
4407
4408
4409 class LUStartupInstance(LogicalUnit):
4410   """Starts an instance.
4411
4412   """
4413   HPATH = "instance-start"
4414   HTYPE = constants.HTYPE_INSTANCE
4415   _OP_PARAMS = [
4416     _PInstanceName,
4417     _PForce,
4418     _PIgnoreOfflineNodes,
4419     ("hvparams", ht.EmptyDict, ht.TDict),
4420     ("beparams", ht.EmptyDict, ht.TDict),
4421     ]
4422   REQ_BGL = False
4423
4424   def CheckArguments(self):
4425     # extra beparams
4426     if self.op.beparams:
4427       # fill the beparams dict
4428       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4429
4430   def ExpandNames(self):
4431     self._ExpandAndLockInstance()
4432
4433   def BuildHooksEnv(self):
4434     """Build hooks env.
4435
4436     This runs on master, primary and secondary nodes of the instance.
4437
4438     """
4439     env = {
4440       "FORCE": self.op.force,
4441       }
4442     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4443     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4444     return env, nl, nl
4445
4446   def CheckPrereq(self):
4447     """Check prerequisites.
4448
4449     This checks that the instance is in the cluster.
4450
4451     """
4452     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4453     assert self.instance is not None, \
4454       "Cannot retrieve locked instance %s" % self.op.instance_name
4455
4456     # extra hvparams
4457     if self.op.hvparams:
4458       # check hypervisor parameter syntax (locally)
4459       cluster = self.cfg.GetClusterInfo()
4460       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4461       filled_hvp = cluster.FillHV(instance)
4462       filled_hvp.update(self.op.hvparams)
4463       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4464       hv_type.CheckParameterSyntax(filled_hvp)
4465       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4466
4467     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
4468
4469     if self.primary_offline and self.op.ignore_offline_nodes:
4470       self.proc.LogWarning("Ignoring offline primary node")
4471
4472       if self.op.hvparams or self.op.beparams:
4473         self.proc.LogWarning("Overridden parameters are ignored")
4474     else:
4475       _CheckNodeOnline(self, instance.primary_node)
4476
4477       bep = self.cfg.GetClusterInfo().FillBE(instance)
4478
4479       # check bridges existence
4480       _CheckInstanceBridgesExist(self, instance)
4481
4482       remote_info = self.rpc.call_instance_info(instance.primary_node,
4483                                                 instance.name,
4484                                                 instance.hypervisor)
4485       remote_info.Raise("Error checking node %s" % instance.primary_node,
4486                         prereq=True, ecode=errors.ECODE_ENVIRON)
4487       if not remote_info.payload: # not running already
4488         _CheckNodeFreeMemory(self, instance.primary_node,
4489                              "starting instance %s" % instance.name,
4490                              bep[constants.BE_MEMORY], instance.hypervisor)
4491
4492   def Exec(self, feedback_fn):
4493     """Start the instance.
4494
4495     """
4496     instance = self.instance
4497     force = self.op.force
4498
4499     self.cfg.MarkInstanceUp(instance.name)
4500
4501     if self.primary_offline:
4502       assert self.op.ignore_offline_nodes
4503       self.proc.LogInfo("Primary node offline, marked instance as started")
4504     else:
4505       node_current = instance.primary_node
4506
4507       _StartInstanceDisks(self, instance, force)
4508
4509       result = self.rpc.call_instance_start(node_current, instance,
4510                                             self.op.hvparams, self.op.beparams)
4511       msg = result.fail_msg
4512       if msg:
4513         _ShutdownInstanceDisks(self, instance)
4514         raise errors.OpExecError("Could not start instance: %s" % msg)
4515
4516
4517 class LURebootInstance(LogicalUnit):
4518   """Reboot an instance.
4519
4520   """
4521   HPATH = "instance-reboot"
4522   HTYPE = constants.HTYPE_INSTANCE
4523   _OP_PARAMS = [
4524     _PInstanceName,
4525     ("ignore_secondaries", False, ht.TBool),
4526     ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
4527     _PShutdownTimeout,
4528     ]
4529   REQ_BGL = False
4530
4531   def ExpandNames(self):
4532     self._ExpandAndLockInstance()
4533
4534   def BuildHooksEnv(self):
4535     """Build hooks env.
4536
4537     This runs on master, primary and secondary nodes of the instance.
4538
4539     """
4540     env = {
4541       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4542       "REBOOT_TYPE": self.op.reboot_type,
4543       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4544       }
4545     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4546     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4547     return env, nl, nl
4548
4549   def CheckPrereq(self):
4550     """Check prerequisites.
4551
4552     This checks that the instance is in the cluster.
4553
4554     """
4555     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4556     assert self.instance is not None, \
4557       "Cannot retrieve locked instance %s" % self.op.instance_name
4558
4559     _CheckNodeOnline(self, instance.primary_node)
4560
4561     # check bridges existence
4562     _CheckInstanceBridgesExist(self, instance)
4563
4564   def Exec(self, feedback_fn):
4565     """Reboot the instance.
4566
4567     """
4568     instance = self.instance
4569     ignore_secondaries = self.op.ignore_secondaries
4570     reboot_type = self.op.reboot_type
4571
4572     node_current = instance.primary_node
4573
4574     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4575                        constants.INSTANCE_REBOOT_HARD]:
4576       for disk in instance.disks:
4577         self.cfg.SetDiskID(disk, node_current)
4578       result = self.rpc.call_instance_reboot(node_current, instance,
4579                                              reboot_type,
4580                                              self.op.shutdown_timeout)
4581       result.Raise("Could not reboot instance")
4582     else:
4583       result = self.rpc.call_instance_shutdown(node_current, instance,
4584                                                self.op.shutdown_timeout)
4585       result.Raise("Could not shutdown instance for full reboot")
4586       _ShutdownInstanceDisks(self, instance)
4587       _StartInstanceDisks(self, instance, ignore_secondaries)
4588       result = self.rpc.call_instance_start(node_current, instance, None, None)
4589       msg = result.fail_msg
4590       if msg:
4591         _ShutdownInstanceDisks(self, instance)
4592         raise errors.OpExecError("Could not start instance for"
4593                                  " full reboot: %s" % msg)
4594
4595     self.cfg.MarkInstanceUp(instance.name)
4596
4597
4598 class LUShutdownInstance(LogicalUnit):
4599   """Shutdown an instance.
4600
4601   """
4602   HPATH = "instance-stop"
4603   HTYPE = constants.HTYPE_INSTANCE
4604   _OP_PARAMS = [
4605     _PInstanceName,
4606     _PIgnoreOfflineNodes,
4607     ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
4608     ]
4609   REQ_BGL = False
4610
4611   def ExpandNames(self):
4612     self._ExpandAndLockInstance()
4613
4614   def BuildHooksEnv(self):
4615     """Build hooks env.
4616
4617     This runs on master, primary and secondary nodes of the instance.
4618
4619     """
4620     env = _BuildInstanceHookEnvByObject(self, self.instance)
4621     env["TIMEOUT"] = self.op.timeout
4622     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4623     return env, nl, nl
4624
4625   def CheckPrereq(self):
4626     """Check prerequisites.
4627
4628     This checks that the instance is in the cluster.
4629
4630     """
4631     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4632     assert self.instance is not None, \
4633       "Cannot retrieve locked instance %s" % self.op.instance_name
4634
4635     self.primary_offline = \
4636       self.cfg.GetNodeInfo(self.instance.primary_node).offline
4637
4638     if self.primary_offline and self.op.ignore_offline_nodes:
4639       self.proc.LogWarning("Ignoring offline primary node")
4640     else:
4641       _CheckNodeOnline(self, self.instance.primary_node)
4642
4643   def Exec(self, feedback_fn):
4644     """Shutdown the instance.
4645
4646     """
4647     instance = self.instance
4648     node_current = instance.primary_node
4649     timeout = self.op.timeout
4650
4651     self.cfg.MarkInstanceDown(instance.name)
4652
4653     if self.primary_offline:
4654       assert self.op.ignore_offline_nodes
4655       self.proc.LogInfo("Primary node offline, marked instance as stopped")
4656     else:
4657       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4658       msg = result.fail_msg
4659       if msg:
4660         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4661
4662       _ShutdownInstanceDisks(self, instance)
4663
4664
4665 class LUReinstallInstance(LogicalUnit):
4666   """Reinstall an instance.
4667
4668   """
4669   HPATH = "instance-reinstall"
4670   HTYPE = constants.HTYPE_INSTANCE
4671   _OP_PARAMS = [
4672     _PInstanceName,
4673     ("os_type", None, ht.TMaybeString),
4674     ("force_variant", False, ht.TBool),
4675     ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
4676     ]
4677   REQ_BGL = False
4678
4679   def ExpandNames(self):
4680     self._ExpandAndLockInstance()
4681
4682   def BuildHooksEnv(self):
4683     """Build hooks env.
4684
4685     This runs on master, primary and secondary nodes of the instance.
4686
4687     """
4688     env = _BuildInstanceHookEnvByObject(self, self.instance)
4689     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4690     return env, nl, nl
4691
4692   def CheckPrereq(self):
4693     """Check prerequisites.
4694
4695     This checks that the instance is in the cluster and is not running.
4696
4697     """
4698     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4699     assert instance is not None, \
4700       "Cannot retrieve locked instance %s" % self.op.instance_name
4701     _CheckNodeOnline(self, instance.primary_node)
4702
4703     if instance.disk_template == constants.DT_DISKLESS:
4704       raise errors.OpPrereqError("Instance '%s' has no disks" %
4705                                  self.op.instance_name,
4706                                  errors.ECODE_INVAL)
4707     _CheckInstanceDown(self, instance, "cannot reinstall")
4708
4709     if self.op.os_type is not None:
4710       # OS verification
4711       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4712       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4713       instance_os = self.op.os_name
4714     else:
4715       instance_os = instance.os
4716
4717     nodelist = list(instance.all_nodes)
4718
4719     if self.op.osparams:
4720       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
4721       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
4722       self.os_inst = i_osdict # the new dict (without defaults)
4723     else:
4724       self.os_inst = None
4725
4726     self.instance = instance
4727
4728   def Exec(self, feedback_fn):
4729     """Reinstall the instance.
4730
4731     """
4732     inst = self.instance
4733
4734     if self.op.os_type is not None:
4735       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4736       inst.os = self.op.os_type
4737       # Write to configuration
4738       self.cfg.Update(inst, feedback_fn)
4739
4740     _StartInstanceDisks(self, inst, None)
4741     try:
4742       feedback_fn("Running the instance OS create scripts...")
4743       # FIXME: pass debug option from opcode to backend
4744       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4745                                              self.op.debug_level,
4746                                              osparams=self.os_inst)
4747       result.Raise("Could not install OS for instance %s on node %s" %
4748                    (inst.name, inst.primary_node))
4749     finally:
4750       _ShutdownInstanceDisks(self, inst)
4751
4752
4753 class LURecreateInstanceDisks(LogicalUnit):
4754   """Recreate an instance's missing disks.
4755
4756   """
4757   HPATH = "instance-recreate-disks"
4758   HTYPE = constants.HTYPE_INSTANCE
4759   _OP_PARAMS = [
4760     _PInstanceName,
4761     ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
4762     ]
4763   REQ_BGL = False
4764
4765   def ExpandNames(self):
4766     self._ExpandAndLockInstance()
4767
4768   def BuildHooksEnv(self):
4769     """Build hooks env.
4770
4771     This runs on master, primary and secondary nodes of the instance.
4772
4773     """
4774     env = _BuildInstanceHookEnvByObject(self, self.instance)
4775     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4776     return env, nl, nl
4777
4778   def CheckPrereq(self):
4779     """Check prerequisites.
4780
4781     This checks that the instance is in the cluster and is not running.
4782
4783     """
4784     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4785     assert instance is not None, \
4786       "Cannot retrieve locked instance %s" % self.op.instance_name
4787     _CheckNodeOnline(self, instance.primary_node)
4788
4789     if instance.disk_template == constants.DT_DISKLESS:
4790       raise errors.OpPrereqError("Instance '%s' has no disks" %
4791                                  self.op.instance_name, errors.ECODE_INVAL)
4792     _CheckInstanceDown(self, instance, "cannot recreate disks")
4793
4794     if not self.op.disks:
4795       self.op.disks = range(len(instance.disks))
4796     else:
4797       for idx in self.op.disks:
4798         if idx >= len(instance.disks):
4799           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4800                                      errors.ECODE_INVAL)
4801
4802     self.instance = instance
4803
4804   def Exec(self, feedback_fn):
4805     """Recreate the disks.
4806
4807     """
4808     to_skip = []
4809     for idx, _ in enumerate(self.instance.disks):
4810       if idx not in self.op.disks: # disk idx has not been passed in
4811         to_skip.append(idx)
4812         continue
4813
4814     _CreateDisks(self, self.instance, to_skip=to_skip)
4815
4816
4817 class LURenameInstance(LogicalUnit):
4818   """Rename an instance.
4819
4820   """
4821   HPATH = "instance-rename"
4822   HTYPE = constants.HTYPE_INSTANCE
4823   _OP_PARAMS = [
4824     _PInstanceName,
4825     ("new_name", ht.NoDefault, ht.TNonEmptyString),
4826     ("ip_check", False, ht.TBool),
4827     ("name_check", True, ht.TBool),
4828     ]
4829
4830   def CheckArguments(self):
4831     """Check arguments.
4832
4833     """
4834     if self.op.ip_check and not self.op.name_check:
4835       # TODO: make the ip check more flexible and not depend on the name check
4836       raise errors.OpPrereqError("Cannot do ip check without a name check",
4837                                  errors.ECODE_INVAL)
4838
4839   def BuildHooksEnv(self):
4840     """Build hooks env.
4841
4842     This runs on master, primary and secondary nodes of the instance.
4843
4844     """
4845     env = _BuildInstanceHookEnvByObject(self, self.instance)
4846     env["INSTANCE_NEW_NAME"] = self.op.new_name
4847     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4848     return env, nl, nl
4849
4850   def CheckPrereq(self):
4851     """Check prerequisites.
4852
4853     This checks that the instance is in the cluster and is not running.
4854
4855     """
4856     self.op.instance_name = _ExpandInstanceName(self.cfg,
4857                                                 self.op.instance_name)
4858     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4859     assert instance is not None
4860     _CheckNodeOnline(self, instance.primary_node)
4861     _CheckInstanceDown(self, instance, "cannot rename")
4862     self.instance = instance
4863
4864     new_name = self.op.new_name
4865     if self.op.name_check:
4866       hostname = netutils.GetHostname(name=new_name)
4867       new_name = self.op.new_name = hostname.name
4868       if (self.op.ip_check and
4869           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
4870         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4871                                    (hostname.ip, new_name),
4872                                    errors.ECODE_NOTUNIQUE)
4873
4874     instance_list = self.cfg.GetInstanceList()
4875     if new_name in instance_list:
4876       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4877                                  new_name, errors.ECODE_EXISTS)
4878
4879   def Exec(self, feedback_fn):
4880     """Reinstall the instance.
4881
4882     """
4883     inst = self.instance
4884     old_name = inst.name
4885
4886     if inst.disk_template == constants.DT_FILE:
4887       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4888
4889     self.cfg.RenameInstance(inst.name, self.op.new_name)
4890     # Change the instance lock. This is definitely safe while we hold the BGL
4891     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4892     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4893
4894     # re-read the instance from the configuration after rename
4895     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4896
4897     if inst.disk_template == constants.DT_FILE:
4898       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4899       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4900                                                      old_file_storage_dir,
4901                                                      new_file_storage_dir)
4902       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4903                    " (but the instance has been renamed in Ganeti)" %
4904                    (inst.primary_node, old_file_storage_dir,
4905                     new_file_storage_dir))
4906
4907     _StartInstanceDisks(self, inst, None)
4908     try:
4909       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4910                                                  old_name, self.op.debug_level)
4911       msg = result.fail_msg
4912       if msg:
4913         msg = ("Could not run OS rename script for instance %s on node %s"
4914                " (but the instance has been renamed in Ganeti): %s" %
4915                (inst.name, inst.primary_node, msg))
4916         self.proc.LogWarning(msg)
4917     finally:
4918       _ShutdownInstanceDisks(self, inst)
4919
4920     return inst.name
4921
4922
4923 class LURemoveInstance(LogicalUnit):
4924   """Remove an instance.
4925
4926   """
4927   HPATH = "instance-remove"
4928   HTYPE = constants.HTYPE_INSTANCE
4929   _OP_PARAMS = [
4930     _PInstanceName,
4931     ("ignore_failures", False, ht.TBool),
4932     _PShutdownTimeout,
4933     ]
4934   REQ_BGL = False
4935
4936   def ExpandNames(self):
4937     self._ExpandAndLockInstance()
4938     self.needed_locks[locking.LEVEL_NODE] = []
4939     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4940
4941   def DeclareLocks(self, level):
4942     if level == locking.LEVEL_NODE:
4943       self._LockInstancesNodes()
4944
4945   def BuildHooksEnv(self):
4946     """Build hooks env.
4947
4948     This runs on master, primary and secondary nodes of the instance.
4949
4950     """
4951     env = _BuildInstanceHookEnvByObject(self, self.instance)
4952     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4953     nl = [self.cfg.GetMasterNode()]
4954     nl_post = list(self.instance.all_nodes) + nl
4955     return env, nl, nl_post
4956
4957   def CheckPrereq(self):
4958     """Check prerequisites.
4959
4960     This checks that the instance is in the cluster.
4961
4962     """
4963     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4964     assert self.instance is not None, \
4965       "Cannot retrieve locked instance %s" % self.op.instance_name
4966
4967   def Exec(self, feedback_fn):
4968     """Remove the instance.
4969
4970     """
4971     instance = self.instance
4972     logging.info("Shutting down instance %s on node %s",
4973                  instance.name, instance.primary_node)
4974
4975     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4976                                              self.op.shutdown_timeout)
4977     msg = result.fail_msg
4978     if msg:
4979       if self.op.ignore_failures:
4980         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4981       else:
4982         raise errors.OpExecError("Could not shutdown instance %s on"
4983                                  " node %s: %s" %
4984                                  (instance.name, instance.primary_node, msg))
4985
4986     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4987
4988
4989 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4990   """Utility function to remove an instance.
4991
4992   """
4993   logging.info("Removing block devices for instance %s", instance.name)
4994
4995   if not _RemoveDisks(lu, instance):
4996     if not ignore_failures:
4997       raise errors.OpExecError("Can't remove instance's disks")
4998     feedback_fn("Warning: can't remove instance's disks")
4999
5000   logging.info("Removing instance %s out of cluster config", instance.name)
5001
5002   lu.cfg.RemoveInstance(instance.name)
5003
5004   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5005     "Instance lock removal conflict"
5006
5007   # Remove lock for the instance
5008   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5009
5010
5011 class LUQueryInstances(NoHooksLU):
5012   """Logical unit for querying instances.
5013
5014   """
5015   # pylint: disable-msg=W0142
5016   _OP_PARAMS = [
5017     ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
5018     ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5019     ("use_locking", False, ht.TBool),
5020     ]
5021   REQ_BGL = False
5022   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5023                     "serial_no", "ctime", "mtime", "uuid"]
5024   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5025                                     "admin_state",
5026                                     "disk_template", "ip", "mac", "bridge",
5027                                     "nic_mode", "nic_link",
5028                                     "sda_size", "sdb_size", "vcpus", "tags",
5029                                     "network_port", "beparams",
5030                                     r"(disk)\.(size)/([0-9]+)",
5031                                     r"(disk)\.(sizes)", "disk_usage",
5032                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5033                                     r"(nic)\.(bridge)/([0-9]+)",
5034                                     r"(nic)\.(macs|ips|modes|links|bridges)",
5035                                     r"(disk|nic)\.(count)",
5036                                     "hvparams", "custom_hvparams",
5037                                     "custom_beparams", "custom_nicparams",
5038                                     ] + _SIMPLE_FIELDS +
5039                                   ["hv/%s" % name
5040                                    for name in constants.HVS_PARAMETERS
5041                                    if name not in constants.HVC_GLOBALS] +
5042                                   ["be/%s" % name
5043                                    for name in constants.BES_PARAMETERS])
5044   _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5045                                    "oper_ram",
5046                                    "oper_vcpus",
5047                                    "status")
5048
5049
5050   def CheckArguments(self):
5051     _CheckOutputFields(static=self._FIELDS_STATIC,
5052                        dynamic=self._FIELDS_DYNAMIC,
5053                        selected=self.op.output_fields)
5054
5055   def ExpandNames(self):
5056     self.needed_locks = {}
5057     self.share_locks[locking.LEVEL_INSTANCE] = 1
5058     self.share_locks[locking.LEVEL_NODE] = 1
5059
5060     if self.op.names:
5061       self.wanted = _GetWantedInstances(self, self.op.names)
5062     else:
5063       self.wanted = locking.ALL_SET
5064
5065     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5066     self.do_locking = self.do_node_query and self.op.use_locking
5067     if self.do_locking:
5068       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5069       self.needed_locks[locking.LEVEL_NODE] = []
5070       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5071
5072   def DeclareLocks(self, level):
5073     if level == locking.LEVEL_NODE and self.do_locking:
5074       self._LockInstancesNodes()
5075
5076   def Exec(self, feedback_fn):
5077     """Computes the list of nodes and their attributes.
5078
5079     """
5080     # pylint: disable-msg=R0912
5081     # way too many branches here
5082     all_info = self.cfg.GetAllInstancesInfo()
5083     if self.wanted == locking.ALL_SET:
5084       # caller didn't specify instance names, so ordering is not important
5085       if self.do_locking:
5086         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5087       else:
5088         instance_names = all_info.keys()
5089       instance_names = utils.NiceSort(instance_names)
5090     else:
5091       # caller did specify names, so we must keep the ordering
5092       if self.do_locking:
5093         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5094       else:
5095         tgt_set = all_info.keys()
5096       missing = set(self.wanted).difference(tgt_set)
5097       if missing:
5098         raise errors.OpExecError("Some instances were removed before"
5099                                  " retrieving their data: %s" % missing)
5100       instance_names = self.wanted
5101
5102     instance_list = [all_info[iname] for iname in instance_names]
5103
5104     # begin data gathering
5105
5106     nodes = frozenset([inst.primary_node for inst in instance_list])
5107     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5108
5109     bad_nodes = []
5110     off_nodes = []
5111     if self.do_node_query:
5112       live_data = {}
5113       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5114       for name in nodes:
5115         result = node_data[name]
5116         if result.offline:
5117           # offline nodes will be in both lists
5118           off_nodes.append(name)
5119         if result.fail_msg:
5120           bad_nodes.append(name)
5121         else:
5122           if result.payload:
5123             live_data.update(result.payload)
5124           # else no instance is alive
5125     else:
5126       live_data = dict([(name, {}) for name in instance_names])
5127
5128     # end data gathering
5129
5130     HVPREFIX = "hv/"
5131     BEPREFIX = "be/"
5132     output = []
5133     cluster = self.cfg.GetClusterInfo()
5134     for instance in instance_list:
5135       iout = []
5136       i_hv = cluster.FillHV(instance, skip_globals=True)
5137       i_be = cluster.FillBE(instance)
5138       i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5139       for field in self.op.output_fields:
5140         st_match = self._FIELDS_STATIC.Matches(field)
5141         if field in self._SIMPLE_FIELDS:
5142           val = getattr(instance, field)
5143         elif field == "pnode":
5144           val = instance.primary_node
5145         elif field == "snodes":
5146           val = list(instance.secondary_nodes)
5147         elif field == "admin_state":
5148           val = instance.admin_up
5149         elif field == "oper_state":
5150           if instance.primary_node in bad_nodes:
5151             val = None
5152           else:
5153             val = bool(live_data.get(instance.name))
5154         elif field == "status":
5155           if instance.primary_node in off_nodes:
5156             val = "ERROR_nodeoffline"
5157           elif instance.primary_node in bad_nodes:
5158             val = "ERROR_nodedown"
5159           else:
5160             running = bool(live_data.get(instance.name))
5161             if running:
5162               if instance.admin_up:
5163                 val = "running"
5164               else:
5165                 val = "ERROR_up"
5166             else:
5167               if instance.admin_up:
5168                 val = "ERROR_down"
5169               else:
5170                 val = "ADMIN_down"
5171         elif field == "oper_ram":
5172           if instance.primary_node in bad_nodes:
5173             val = None
5174           elif instance.name in live_data:
5175             val = live_data[instance.name].get("memory", "?")
5176           else:
5177             val = "-"
5178         elif field == "oper_vcpus":
5179           if instance.primary_node in bad_nodes:
5180             val = None
5181           elif instance.name in live_data:
5182             val = live_data[instance.name].get("vcpus", "?")
5183           else:
5184             val = "-"
5185         elif field == "vcpus":
5186           val = i_be[constants.BE_VCPUS]
5187         elif field == "disk_template":
5188           val = instance.disk_template
5189         elif field == "ip":
5190           if instance.nics:
5191             val = instance.nics[0].ip
5192           else:
5193             val = None
5194         elif field == "nic_mode":
5195           if instance.nics:
5196             val = i_nicp[0][constants.NIC_MODE]
5197           else:
5198             val = None
5199         elif field == "nic_link":
5200           if instance.nics:
5201             val = i_nicp[0][constants.NIC_LINK]
5202           else:
5203             val = None
5204         elif field == "bridge":
5205           if (instance.nics and
5206               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5207             val = i_nicp[0][constants.NIC_LINK]
5208           else:
5209             val = None
5210         elif field == "mac":
5211           if instance.nics:
5212             val = instance.nics[0].mac
5213           else:
5214             val = None
5215         elif field == "custom_nicparams":
5216           val = [nic.nicparams for nic in instance.nics]
5217         elif field == "sda_size" or field == "sdb_size":
5218           idx = ord(field[2]) - ord('a')
5219           try:
5220             val = instance.FindDisk(idx).size
5221           except errors.OpPrereqError:
5222             val = None
5223         elif field == "disk_usage": # total disk usage per node
5224           disk_sizes = [{'size': disk.size} for disk in instance.disks]
5225           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5226         elif field == "tags":
5227           val = list(instance.GetTags())
5228         elif field == "custom_hvparams":
5229           val = instance.hvparams # not filled!
5230         elif field == "hvparams":
5231           val = i_hv
5232         elif (field.startswith(HVPREFIX) and
5233               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5234               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5235           val = i_hv.get(field[len(HVPREFIX):], None)
5236         elif field == "custom_beparams":
5237           val = instance.beparams
5238         elif field == "beparams":
5239           val = i_be
5240         elif (field.startswith(BEPREFIX) and
5241               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5242           val = i_be.get(field[len(BEPREFIX):], None)
5243         elif st_match and st_match.groups():
5244           # matches a variable list
5245           st_groups = st_match.groups()
5246           if st_groups and st_groups[0] == "disk":
5247             if st_groups[1] == "count":
5248               val = len(instance.disks)
5249             elif st_groups[1] == "sizes":
5250               val = [disk.size for disk in instance.disks]
5251             elif st_groups[1] == "size":
5252               try:
5253                 val = instance.FindDisk(st_groups[2]).size
5254               except errors.OpPrereqError:
5255                 val = None
5256             else:
5257               assert False, "Unhandled disk parameter"
5258           elif st_groups[0] == "nic":
5259             if st_groups[1] == "count":
5260               val = len(instance.nics)
5261             elif st_groups[1] == "macs":
5262               val = [nic.mac for nic in instance.nics]
5263             elif st_groups[1] == "ips":
5264               val = [nic.ip for nic in instance.nics]
5265             elif st_groups[1] == "modes":
5266               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5267             elif st_groups[1] == "links":
5268               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5269             elif st_groups[1] == "bridges":
5270               val = []
5271               for nicp in i_nicp:
5272                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5273                   val.append(nicp[constants.NIC_LINK])
5274                 else:
5275                   val.append(None)
5276             else:
5277               # index-based item
5278               nic_idx = int(st_groups[2])
5279               if nic_idx >= len(instance.nics):
5280                 val = None
5281               else:
5282                 if st_groups[1] == "mac":
5283                   val = instance.nics[nic_idx].mac
5284                 elif st_groups[1] == "ip":
5285                   val = instance.nics[nic_idx].ip
5286                 elif st_groups[1] == "mode":
5287                   val = i_nicp[nic_idx][constants.NIC_MODE]
5288                 elif st_groups[1] == "link":
5289                   val = i_nicp[nic_idx][constants.NIC_LINK]
5290                 elif st_groups[1] == "bridge":
5291                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5292                   if nic_mode == constants.NIC_MODE_BRIDGED:
5293                     val = i_nicp[nic_idx][constants.NIC_LINK]
5294                   else:
5295                     val = None
5296                 else:
5297                   assert False, "Unhandled NIC parameter"
5298           else:
5299             assert False, ("Declared but unhandled variable parameter '%s'" %
5300                            field)
5301         else:
5302           assert False, "Declared but unhandled parameter '%s'" % field
5303         iout.append(val)
5304       output.append(iout)
5305
5306     return output
5307
5308
5309 class LUFailoverInstance(LogicalUnit):
5310   """Failover an instance.
5311
5312   """
5313   HPATH = "instance-failover"
5314   HTYPE = constants.HTYPE_INSTANCE
5315   _OP_PARAMS = [
5316     _PInstanceName,
5317     ("ignore_consistency", False, ht.TBool),
5318     _PShutdownTimeout,
5319     ]
5320   REQ_BGL = False
5321
5322   def ExpandNames(self):
5323     self._ExpandAndLockInstance()
5324     self.needed_locks[locking.LEVEL_NODE] = []
5325     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5326
5327   def DeclareLocks(self, level):
5328     if level == locking.LEVEL_NODE:
5329       self._LockInstancesNodes()
5330
5331   def BuildHooksEnv(self):
5332     """Build hooks env.
5333
5334     This runs on master, primary and secondary nodes of the instance.
5335
5336     """
5337     instance = self.instance
5338     source_node = instance.primary_node
5339     target_node = instance.secondary_nodes[0]
5340     env = {
5341       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5342       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5343       "OLD_PRIMARY": source_node,
5344       "OLD_SECONDARY": target_node,
5345       "NEW_PRIMARY": target_node,
5346       "NEW_SECONDARY": source_node,
5347       }
5348     env.update(_BuildInstanceHookEnvByObject(self, instance))
5349     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5350     nl_post = list(nl)
5351     nl_post.append(source_node)
5352     return env, nl, nl_post
5353
5354   def CheckPrereq(self):
5355     """Check prerequisites.
5356
5357     This checks that the instance is in the cluster.
5358
5359     """
5360     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5361     assert self.instance is not None, \
5362       "Cannot retrieve locked instance %s" % self.op.instance_name
5363
5364     bep = self.cfg.GetClusterInfo().FillBE(instance)
5365     if instance.disk_template not in constants.DTS_NET_MIRROR:
5366       raise errors.OpPrereqError("Instance's disk layout is not"
5367                                  " network mirrored, cannot failover.",
5368                                  errors.ECODE_STATE)
5369
5370     secondary_nodes = instance.secondary_nodes
5371     if not secondary_nodes:
5372       raise errors.ProgrammerError("no secondary node but using "
5373                                    "a mirrored disk template")
5374
5375     target_node = secondary_nodes[0]
5376     _CheckNodeOnline(self, target_node)
5377     _CheckNodeNotDrained(self, target_node)
5378     if instance.admin_up:
5379       # check memory requirements on the secondary node
5380       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5381                            instance.name, bep[constants.BE_MEMORY],
5382                            instance.hypervisor)
5383     else:
5384       self.LogInfo("Not checking memory on the secondary node as"
5385                    " instance will not be started")
5386
5387     # check bridge existance
5388     _CheckInstanceBridgesExist(self, instance, node=target_node)
5389
5390   def Exec(self, feedback_fn):
5391     """Failover an instance.
5392
5393     The failover is done by shutting it down on its present node and
5394     starting it on the secondary.
5395
5396     """
5397     instance = self.instance
5398     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5399
5400     source_node = instance.primary_node
5401     target_node = instance.secondary_nodes[0]
5402
5403     if instance.admin_up:
5404       feedback_fn("* checking disk consistency between source and target")
5405       for dev in instance.disks:
5406         # for drbd, these are drbd over lvm
5407         if not _CheckDiskConsistency(self, dev, target_node, False):
5408           if not self.op.ignore_consistency:
5409             raise errors.OpExecError("Disk %s is degraded on target node,"
5410                                      " aborting failover." % dev.iv_name)
5411     else:
5412       feedback_fn("* not checking disk consistency as instance is not running")
5413
5414     feedback_fn("* shutting down instance on source node")
5415     logging.info("Shutting down instance %s on node %s",
5416                  instance.name, source_node)
5417
5418     result = self.rpc.call_instance_shutdown(source_node, instance,
5419                                              self.op.shutdown_timeout)
5420     msg = result.fail_msg
5421     if msg:
5422       if self.op.ignore_consistency or primary_node.offline:
5423         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5424                              " Proceeding anyway. Please make sure node"
5425                              " %s is down. Error details: %s",
5426                              instance.name, source_node, source_node, msg)
5427       else:
5428         raise errors.OpExecError("Could not shutdown instance %s on"
5429                                  " node %s: %s" %
5430                                  (instance.name, source_node, msg))
5431
5432     feedback_fn("* deactivating the instance's disks on source node")
5433     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5434       raise errors.OpExecError("Can't shut down the instance's disks.")
5435
5436     instance.primary_node = target_node
5437     # distribute new instance config to the other nodes
5438     self.cfg.Update(instance, feedback_fn)
5439
5440     # Only start the instance if it's marked as up
5441     if instance.admin_up:
5442       feedback_fn("* activating the instance's disks on target node")
5443       logging.info("Starting instance %s on node %s",
5444                    instance.name, target_node)
5445
5446       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5447                                            ignore_secondaries=True)
5448       if not disks_ok:
5449         _ShutdownInstanceDisks(self, instance)
5450         raise errors.OpExecError("Can't activate the instance's disks")
5451
5452       feedback_fn("* starting the instance on the target node")
5453       result = self.rpc.call_instance_start(target_node, instance, None, None)
5454       msg = result.fail_msg
5455       if msg:
5456         _ShutdownInstanceDisks(self, instance)
5457         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5458                                  (instance.name, target_node, msg))
5459
5460
5461 class LUMigrateInstance(LogicalUnit):
5462   """Migrate an instance.
5463
5464   This is migration without shutting down, compared to the failover,
5465   which is done with shutdown.
5466
5467   """
5468   HPATH = "instance-migrate"
5469   HTYPE = constants.HTYPE_INSTANCE
5470   _OP_PARAMS = [
5471     _PInstanceName,
5472     _PMigrationMode,
5473     _PMigrationLive,
5474     ("cleanup", False, ht.TBool),
5475     ]
5476
5477   REQ_BGL = False
5478
5479   def ExpandNames(self):
5480     self._ExpandAndLockInstance()
5481
5482     self.needed_locks[locking.LEVEL_NODE] = []
5483     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5484
5485     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5486                                        self.op.cleanup)
5487     self.tasklets = [self._migrater]
5488
5489   def DeclareLocks(self, level):
5490     if level == locking.LEVEL_NODE:
5491       self._LockInstancesNodes()
5492
5493   def BuildHooksEnv(self):
5494     """Build hooks env.
5495
5496     This runs on master, primary and secondary nodes of the instance.
5497
5498     """
5499     instance = self._migrater.instance
5500     source_node = instance.primary_node
5501     target_node = instance.secondary_nodes[0]
5502     env = _BuildInstanceHookEnvByObject(self, instance)
5503     env["MIGRATE_LIVE"] = self._migrater.live
5504     env["MIGRATE_CLEANUP"] = self.op.cleanup
5505     env.update({
5506         "OLD_PRIMARY": source_node,
5507         "OLD_SECONDARY": target_node,
5508         "NEW_PRIMARY": target_node,
5509         "NEW_SECONDARY": source_node,
5510         })
5511     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5512     nl_post = list(nl)
5513     nl_post.append(source_node)
5514     return env, nl, nl_post
5515
5516
5517 class LUMoveInstance(LogicalUnit):
5518   """Move an instance by data-copying.
5519
5520   """
5521   HPATH = "instance-move"
5522   HTYPE = constants.HTYPE_INSTANCE
5523   _OP_PARAMS = [
5524     _PInstanceName,
5525     ("target_node", ht.NoDefault, ht.TNonEmptyString),
5526     _PShutdownTimeout,
5527     ]
5528   REQ_BGL = False
5529
5530   def ExpandNames(self):
5531     self._ExpandAndLockInstance()
5532     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5533     self.op.target_node = target_node
5534     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5535     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5536
5537   def DeclareLocks(self, level):
5538     if level == locking.LEVEL_NODE:
5539       self._LockInstancesNodes(primary_only=True)
5540
5541   def BuildHooksEnv(self):
5542     """Build hooks env.
5543
5544     This runs on master, primary and secondary nodes of the instance.
5545
5546     """
5547     env = {
5548       "TARGET_NODE": self.op.target_node,
5549       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5550       }
5551     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5552     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5553                                        self.op.target_node]
5554     return env, nl, nl
5555
5556   def CheckPrereq(self):
5557     """Check prerequisites.
5558
5559     This checks that the instance is in the cluster.
5560
5561     """
5562     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5563     assert self.instance is not None, \
5564       "Cannot retrieve locked instance %s" % self.op.instance_name
5565
5566     node = self.cfg.GetNodeInfo(self.op.target_node)
5567     assert node is not None, \
5568       "Cannot retrieve locked node %s" % self.op.target_node
5569
5570     self.target_node = target_node = node.name
5571
5572     if target_node == instance.primary_node:
5573       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5574                                  (instance.name, target_node),
5575                                  errors.ECODE_STATE)
5576
5577     bep = self.cfg.GetClusterInfo().FillBE(instance)
5578
5579     for idx, dsk in enumerate(instance.disks):
5580       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5581         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5582                                    " cannot copy" % idx, errors.ECODE_STATE)
5583
5584     _CheckNodeOnline(self, target_node)
5585     _CheckNodeNotDrained(self, target_node)
5586
5587     if instance.admin_up:
5588       # check memory requirements on the secondary node
5589       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5590                            instance.name, bep[constants.BE_MEMORY],
5591                            instance.hypervisor)
5592     else:
5593       self.LogInfo("Not checking memory on the secondary node as"
5594                    " instance will not be started")
5595
5596     # check bridge existance
5597     _CheckInstanceBridgesExist(self, instance, node=target_node)
5598
5599   def Exec(self, feedback_fn):
5600     """Move an instance.
5601
5602     The move is done by shutting it down on its present node, copying
5603     the data over (slow) and starting it on the new node.
5604
5605     """
5606     instance = self.instance
5607
5608     source_node = instance.primary_node
5609     target_node = self.target_node
5610
5611     self.LogInfo("Shutting down instance %s on source node %s",
5612                  instance.name, source_node)
5613
5614     result = self.rpc.call_instance_shutdown(source_node, instance,
5615                                              self.op.shutdown_timeout)
5616     msg = result.fail_msg
5617     if msg:
5618       if self.op.ignore_consistency:
5619         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5620                              " Proceeding anyway. Please make sure node"
5621                              " %s is down. Error details: %s",
5622                              instance.name, source_node, source_node, msg)
5623       else:
5624         raise errors.OpExecError("Could not shutdown instance %s on"
5625                                  " node %s: %s" %
5626                                  (instance.name, source_node, msg))
5627
5628     # create the target disks
5629     try:
5630       _CreateDisks(self, instance, target_node=target_node)
5631     except errors.OpExecError:
5632       self.LogWarning("Device creation failed, reverting...")
5633       try:
5634         _RemoveDisks(self, instance, target_node=target_node)
5635       finally:
5636         self.cfg.ReleaseDRBDMinors(instance.name)
5637         raise
5638
5639     cluster_name = self.cfg.GetClusterInfo().cluster_name
5640
5641     errs = []
5642     # activate, get path, copy the data over
5643     for idx, disk in enumerate(instance.disks):
5644       self.LogInfo("Copying data for disk %d", idx)
5645       result = self.rpc.call_blockdev_assemble(target_node, disk,
5646                                                instance.name, True)
5647       if result.fail_msg:
5648         self.LogWarning("Can't assemble newly created disk %d: %s",
5649                         idx, result.fail_msg)
5650         errs.append(result.fail_msg)
5651         break
5652       dev_path = result.payload
5653       result = self.rpc.call_blockdev_export(source_node, disk,
5654                                              target_node, dev_path,
5655                                              cluster_name)
5656       if result.fail_msg:
5657         self.LogWarning("Can't copy data over for disk %d: %s",
5658                         idx, result.fail_msg)
5659         errs.append(result.fail_msg)
5660         break
5661
5662     if errs:
5663       self.LogWarning("Some disks failed to copy, aborting")
5664       try:
5665         _RemoveDisks(self, instance, target_node=target_node)
5666       finally:
5667         self.cfg.ReleaseDRBDMinors(instance.name)
5668         raise errors.OpExecError("Errors during disk copy: %s" %
5669                                  (",".join(errs),))
5670
5671     instance.primary_node = target_node
5672     self.cfg.Update(instance, feedback_fn)
5673
5674     self.LogInfo("Removing the disks on the original node")
5675     _RemoveDisks(self, instance, target_node=source_node)
5676
5677     # Only start the instance if it's marked as up
5678     if instance.admin_up:
5679       self.LogInfo("Starting instance %s on node %s",
5680                    instance.name, target_node)
5681
5682       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5683                                            ignore_secondaries=True)
5684       if not disks_ok:
5685         _ShutdownInstanceDisks(self, instance)
5686         raise errors.OpExecError("Can't activate the instance's disks")
5687
5688       result = self.rpc.call_instance_start(target_node, instance, None, None)
5689       msg = result.fail_msg
5690       if msg:
5691         _ShutdownInstanceDisks(self, instance)
5692         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5693                                  (instance.name, target_node, msg))
5694
5695
5696 class LUMigrateNode(LogicalUnit):
5697   """Migrate all instances from a node.
5698
5699   """
5700   HPATH = "node-migrate"
5701   HTYPE = constants.HTYPE_NODE
5702   _OP_PARAMS = [
5703     _PNodeName,
5704     _PMigrationMode,
5705     _PMigrationLive,
5706     ]
5707   REQ_BGL = False
5708
5709   def ExpandNames(self):
5710     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5711
5712     self.needed_locks = {
5713       locking.LEVEL_NODE: [self.op.node_name],
5714       }
5715
5716     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5717
5718     # Create tasklets for migrating instances for all instances on this node
5719     names = []
5720     tasklets = []
5721
5722     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5723       logging.debug("Migrating instance %s", inst.name)
5724       names.append(inst.name)
5725
5726       tasklets.append(TLMigrateInstance(self, inst.name, False))
5727
5728     self.tasklets = tasklets
5729
5730     # Declare instance locks
5731     self.needed_locks[locking.LEVEL_INSTANCE] = names
5732
5733   def DeclareLocks(self, level):
5734     if level == locking.LEVEL_NODE:
5735       self._LockInstancesNodes()
5736
5737   def BuildHooksEnv(self):
5738     """Build hooks env.
5739
5740     This runs on the master, the primary and all the secondaries.
5741
5742     """
5743     env = {
5744       "NODE_NAME": self.op.node_name,
5745       }
5746
5747     nl = [self.cfg.GetMasterNode()]
5748
5749     return (env, nl, nl)
5750
5751
5752 class TLMigrateInstance(Tasklet):
5753   """Tasklet class for instance migration.
5754
5755   @type live: boolean
5756   @ivar live: whether the migration will be done live or non-live;
5757       this variable is initalized only after CheckPrereq has run
5758
5759   """
5760   def __init__(self, lu, instance_name, cleanup):
5761     """Initializes this class.
5762
5763     """
5764     Tasklet.__init__(self, lu)
5765
5766     # Parameters
5767     self.instance_name = instance_name
5768     self.cleanup = cleanup
5769     self.live = False # will be overridden later
5770
5771   def CheckPrereq(self):
5772     """Check prerequisites.
5773
5774     This checks that the instance is in the cluster.
5775
5776     """
5777     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5778     instance = self.cfg.GetInstanceInfo(instance_name)
5779     assert instance is not None
5780
5781     if instance.disk_template != constants.DT_DRBD8:
5782       raise errors.OpPrereqError("Instance's disk layout is not"
5783                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5784
5785     secondary_nodes = instance.secondary_nodes
5786     if not secondary_nodes:
5787       raise errors.ConfigurationError("No secondary node but using"
5788                                       " drbd8 disk template")
5789
5790     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5791
5792     target_node = secondary_nodes[0]
5793     # check memory requirements on the secondary node
5794     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5795                          instance.name, i_be[constants.BE_MEMORY],
5796                          instance.hypervisor)
5797
5798     # check bridge existance
5799     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5800
5801     if not self.cleanup:
5802       _CheckNodeNotDrained(self.lu, target_node)
5803       result = self.rpc.call_instance_migratable(instance.primary_node,
5804                                                  instance)
5805       result.Raise("Can't migrate, please use failover",
5806                    prereq=True, ecode=errors.ECODE_STATE)
5807
5808     self.instance = instance
5809
5810     if self.lu.op.live is not None and self.lu.op.mode is not None:
5811       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5812                                  " parameters are accepted",
5813                                  errors.ECODE_INVAL)
5814     if self.lu.op.live is not None:
5815       if self.lu.op.live:
5816         self.lu.op.mode = constants.HT_MIGRATION_LIVE
5817       else:
5818         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5819       # reset the 'live' parameter to None so that repeated
5820       # invocations of CheckPrereq do not raise an exception
5821       self.lu.op.live = None
5822     elif self.lu.op.mode is None:
5823       # read the default value from the hypervisor
5824       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5825       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5826
5827     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5828
5829   def _WaitUntilSync(self):
5830     """Poll with custom rpc for disk sync.
5831
5832     This uses our own step-based rpc call.
5833
5834     """
5835     self.feedback_fn("* wait until resync is done")
5836     all_done = False
5837     while not all_done:
5838       all_done = True
5839       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5840                                             self.nodes_ip,
5841                                             self.instance.disks)
5842       min_percent = 100
5843       for node, nres in result.items():
5844         nres.Raise("Cannot resync disks on node %s" % node)
5845         node_done, node_percent = nres.payload
5846         all_done = all_done and node_done
5847         if node_percent is not None:
5848           min_percent = min(min_percent, node_percent)
5849       if not all_done:
5850         if min_percent < 100:
5851           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5852         time.sleep(2)
5853
5854   def _EnsureSecondary(self, node):
5855     """Demote a node to secondary.
5856
5857     """
5858     self.feedback_fn("* switching node %s to secondary mode" % node)
5859
5860     for dev in self.instance.disks:
5861       self.cfg.SetDiskID(dev, node)
5862
5863     result = self.rpc.call_blockdev_close(node, self.instance.name,
5864                                           self.instance.disks)
5865     result.Raise("Cannot change disk to secondary on node %s" % node)
5866
5867   def _GoStandalone(self):
5868     """Disconnect from the network.
5869
5870     """
5871     self.feedback_fn("* changing into standalone mode")
5872     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5873                                                self.instance.disks)
5874     for node, nres in result.items():
5875       nres.Raise("Cannot disconnect disks node %s" % node)
5876
5877   def _GoReconnect(self, multimaster):
5878     """Reconnect to the network.
5879
5880     """
5881     if multimaster:
5882       msg = "dual-master"
5883     else:
5884       msg = "single-master"
5885     self.feedback_fn("* changing disks into %s mode" % msg)
5886     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5887                                            self.instance.disks,
5888                                            self.instance.name, multimaster)
5889     for node, nres in result.items():
5890       nres.Raise("Cannot change disks config on node %s" % node)
5891
5892   def _ExecCleanup(self):
5893     """Try to cleanup after a failed migration.
5894
5895     The cleanup is done by:
5896       - check that the instance is running only on one node
5897         (and update the config if needed)
5898       - change disks on its secondary node to secondary
5899       - wait until disks are fully synchronized
5900       - disconnect from the network
5901       - change disks into single-master mode
5902       - wait again until disks are fully synchronized
5903
5904     """
5905     instance = self.instance
5906     target_node = self.target_node
5907     source_node = self.source_node
5908
5909     # check running on only one node
5910     self.feedback_fn("* checking where the instance actually runs"
5911                      " (if this hangs, the hypervisor might be in"
5912                      " a bad state)")
5913     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5914     for node, result in ins_l.items():
5915       result.Raise("Can't contact node %s" % node)
5916
5917     runningon_source = instance.name in ins_l[source_node].payload
5918     runningon_target = instance.name in ins_l[target_node].payload
5919
5920     if runningon_source and runningon_target:
5921       raise errors.OpExecError("Instance seems to be running on two nodes,"
5922                                " or the hypervisor is confused. You will have"
5923                                " to ensure manually that it runs only on one"
5924                                " and restart this operation.")
5925
5926     if not (runningon_source or runningon_target):
5927       raise errors.OpExecError("Instance does not seem to be running at all."
5928                                " In this case, it's safer to repair by"
5929                                " running 'gnt-instance stop' to ensure disk"
5930                                " shutdown, and then restarting it.")
5931
5932     if runningon_target:
5933       # the migration has actually succeeded, we need to update the config
5934       self.feedback_fn("* instance running on secondary node (%s),"
5935                        " updating config" % target_node)
5936       instance.primary_node = target_node
5937       self.cfg.Update(instance, self.feedback_fn)
5938       demoted_node = source_node
5939     else:
5940       self.feedback_fn("* instance confirmed to be running on its"
5941                        " primary node (%s)" % source_node)
5942       demoted_node = target_node
5943
5944     self._EnsureSecondary(demoted_node)
5945     try:
5946       self._WaitUntilSync()
5947     except errors.OpExecError:
5948       # we ignore here errors, since if the device is standalone, it
5949       # won't be able to sync
5950       pass
5951     self._GoStandalone()
5952     self._GoReconnect(False)
5953     self._WaitUntilSync()
5954
5955     self.feedback_fn("* done")
5956
5957   def _RevertDiskStatus(self):
5958     """Try to revert the disk status after a failed migration.
5959
5960     """
5961     target_node = self.target_node
5962     try:
5963       self._EnsureSecondary(target_node)
5964       self._GoStandalone()
5965       self._GoReconnect(False)
5966       self._WaitUntilSync()
5967     except errors.OpExecError, err:
5968       self.lu.LogWarning("Migration failed and I can't reconnect the"
5969                          " drives: error '%s'\n"
5970                          "Please look and recover the instance status" %
5971                          str(err))
5972
5973   def _AbortMigration(self):
5974     """Call the hypervisor code to abort a started migration.
5975
5976     """
5977     instance = self.instance
5978     target_node = self.target_node
5979     migration_info = self.migration_info
5980
5981     abort_result = self.rpc.call_finalize_migration(target_node,
5982                                                     instance,
5983                                                     migration_info,
5984                                                     False)
5985     abort_msg = abort_result.fail_msg
5986     if abort_msg:
5987       logging.error("Aborting migration failed on target node %s: %s",
5988                     target_node, abort_msg)
5989       # Don't raise an exception here, as we stil have to try to revert the
5990       # disk status, even if this step failed.
5991
5992   def _ExecMigration(self):
5993     """Migrate an instance.
5994
5995     The migrate is done by:
5996       - change the disks into dual-master mode
5997       - wait until disks are fully synchronized again
5998       - migrate the instance
5999       - change disks on the new secondary node (the old primary) to secondary
6000       - wait until disks are fully synchronized
6001       - change disks into single-master mode
6002
6003     """
6004     instance = self.instance
6005     target_node = self.target_node
6006     source_node = self.source_node
6007
6008     self.feedback_fn("* checking disk consistency between source and target")
6009     for dev in instance.disks:
6010       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6011         raise errors.OpExecError("Disk %s is degraded or not fully"
6012                                  " synchronized on target node,"
6013                                  " aborting migrate." % dev.iv_name)
6014
6015     # First get the migration information from the remote node
6016     result = self.rpc.call_migration_info(source_node, instance)
6017     msg = result.fail_msg
6018     if msg:
6019       log_err = ("Failed fetching source migration information from %s: %s" %
6020                  (source_node, msg))
6021       logging.error(log_err)
6022       raise errors.OpExecError(log_err)
6023
6024     self.migration_info = migration_info = result.payload
6025
6026     # Then switch the disks to master/master mode
6027     self._EnsureSecondary(target_node)
6028     self._GoStandalone()
6029     self._GoReconnect(True)
6030     self._WaitUntilSync()
6031
6032     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6033     result = self.rpc.call_accept_instance(target_node,
6034                                            instance,
6035                                            migration_info,
6036                                            self.nodes_ip[target_node])
6037
6038     msg = result.fail_msg
6039     if msg:
6040       logging.error("Instance pre-migration failed, trying to revert"
6041                     " disk status: %s", msg)
6042       self.feedback_fn("Pre-migration failed, aborting")
6043       self._AbortMigration()
6044       self._RevertDiskStatus()
6045       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6046                                (instance.name, msg))
6047
6048     self.feedback_fn("* migrating instance to %s" % target_node)
6049     time.sleep(10)
6050     result = self.rpc.call_instance_migrate(source_node, instance,
6051                                             self.nodes_ip[target_node],
6052                                             self.live)
6053     msg = result.fail_msg
6054     if msg:
6055       logging.error("Instance migration failed, trying to revert"
6056                     " disk status: %s", msg)
6057       self.feedback_fn("Migration failed, aborting")
6058       self._AbortMigration()
6059       self._RevertDiskStatus()
6060       raise errors.OpExecError("Could not migrate instance %s: %s" %
6061                                (instance.name, msg))
6062     time.sleep(10)
6063
6064     instance.primary_node = target_node
6065     # distribute new instance config to the other nodes
6066     self.cfg.Update(instance, self.feedback_fn)
6067
6068     result = self.rpc.call_finalize_migration(target_node,
6069                                               instance,
6070                                               migration_info,
6071                                               True)
6072     msg = result.fail_msg
6073     if msg:
6074       logging.error("Instance migration succeeded, but finalization failed:"
6075                     " %s", msg)
6076       raise errors.OpExecError("Could not finalize instance migration: %s" %
6077                                msg)
6078
6079     self._EnsureSecondary(source_node)
6080     self._WaitUntilSync()
6081     self._GoStandalone()
6082     self._GoReconnect(False)
6083     self._WaitUntilSync()
6084
6085     self.feedback_fn("* done")
6086
6087   def Exec(self, feedback_fn):
6088     """Perform the migration.
6089
6090     """
6091     feedback_fn("Migrating instance %s" % self.instance.name)
6092
6093     self.feedback_fn = feedback_fn
6094
6095     self.source_node = self.instance.primary_node
6096     self.target_node = self.instance.secondary_nodes[0]
6097     self.all_nodes = [self.source_node, self.target_node]
6098     self.nodes_ip = {
6099       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6100       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6101       }
6102
6103     if self.cleanup:
6104       return self._ExecCleanup()
6105     else:
6106       return self._ExecMigration()
6107
6108
6109 def _CreateBlockDev(lu, node, instance, device, force_create,
6110                     info, force_open):
6111   """Create a tree of block devices on a given node.
6112
6113   If this device type has to be created on secondaries, create it and
6114   all its children.
6115
6116   If not, just recurse to children keeping the same 'force' value.
6117
6118   @param lu: the lu on whose behalf we execute
6119   @param node: the node on which to create the device
6120   @type instance: L{objects.Instance}
6121   @param instance: the instance which owns the device
6122   @type device: L{objects.Disk}
6123   @param device: the device to create
6124   @type force_create: boolean
6125   @param force_create: whether to force creation of this device; this
6126       will be change to True whenever we find a device which has
6127       CreateOnSecondary() attribute
6128   @param info: the extra 'metadata' we should attach to the device
6129       (this will be represented as a LVM tag)
6130   @type force_open: boolean
6131   @param force_open: this parameter will be passes to the
6132       L{backend.BlockdevCreate} function where it specifies
6133       whether we run on primary or not, and it affects both
6134       the child assembly and the device own Open() execution
6135
6136   """
6137   if device.CreateOnSecondary():
6138     force_create = True
6139
6140   if device.children:
6141     for child in device.children:
6142       _CreateBlockDev(lu, node, instance, child, force_create,
6143                       info, force_open)
6144
6145   if not force_create:
6146     return
6147
6148   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6149
6150
6151 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6152   """Create a single block device on a given node.
6153
6154   This will not recurse over children of the device, so they must be
6155   created in advance.
6156
6157   @param lu: the lu on whose behalf we execute
6158   @param node: the node on which to create the device
6159   @type instance: L{objects.Instance}
6160   @param instance: the instance which owns the device
6161   @type device: L{objects.Disk}
6162   @param device: the device to create
6163   @param info: the extra 'metadata' we should attach to the device
6164       (this will be represented as a LVM tag)
6165   @type force_open: boolean
6166   @param force_open: this parameter will be passes to the
6167       L{backend.BlockdevCreate} function where it specifies
6168       whether we run on primary or not, and it affects both
6169       the child assembly and the device own Open() execution
6170
6171   """
6172   lu.cfg.SetDiskID(device, node)
6173   result = lu.rpc.call_blockdev_create(node, device, device.size,
6174                                        instance.name, force_open, info)
6175   result.Raise("Can't create block device %s on"
6176                " node %s for instance %s" % (device, node, instance.name))
6177   if device.physical_id is None:
6178     device.physical_id = result.payload
6179
6180
6181 def _GenerateUniqueNames(lu, exts):
6182   """Generate a suitable LV name.
6183
6184   This will generate a logical volume name for the given instance.
6185
6186   """
6187   results = []
6188   for val in exts:
6189     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6190     results.append("%s%s" % (new_id, val))
6191   return results
6192
6193
6194 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6195                          p_minor, s_minor):
6196   """Generate a drbd8 device complete with its children.
6197
6198   """
6199   port = lu.cfg.AllocatePort()
6200   vgname = lu.cfg.GetVGName()
6201   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6202   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6203                           logical_id=(vgname, names[0]))
6204   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6205                           logical_id=(vgname, names[1]))
6206   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6207                           logical_id=(primary, secondary, port,
6208                                       p_minor, s_minor,
6209                                       shared_secret),
6210                           children=[dev_data, dev_meta],
6211                           iv_name=iv_name)
6212   return drbd_dev
6213
6214
6215 def _GenerateDiskTemplate(lu, template_name,
6216                           instance_name, primary_node,
6217                           secondary_nodes, disk_info,
6218                           file_storage_dir, file_driver,
6219                           base_index):
6220   """Generate the entire disk layout for a given template type.
6221
6222   """
6223   #TODO: compute space requirements
6224
6225   vgname = lu.cfg.GetVGName()
6226   disk_count = len(disk_info)
6227   disks = []
6228   if template_name == constants.DT_DISKLESS:
6229     pass
6230   elif template_name == constants.DT_PLAIN:
6231     if len(secondary_nodes) != 0:
6232       raise errors.ProgrammerError("Wrong template configuration")
6233
6234     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6235                                       for i in range(disk_count)])
6236     for idx, disk in enumerate(disk_info):
6237       disk_index = idx + base_index
6238       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6239                               logical_id=(vgname, names[idx]),
6240                               iv_name="disk/%d" % disk_index,
6241                               mode=disk["mode"])
6242       disks.append(disk_dev)
6243   elif template_name == constants.DT_DRBD8:
6244     if len(secondary_nodes) != 1:
6245       raise errors.ProgrammerError("Wrong template configuration")
6246     remote_node = secondary_nodes[0]
6247     minors = lu.cfg.AllocateDRBDMinor(
6248       [primary_node, remote_node] * len(disk_info), instance_name)
6249
6250     names = []
6251     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6252                                                for i in range(disk_count)]):
6253       names.append(lv_prefix + "_data")
6254       names.append(lv_prefix + "_meta")
6255     for idx, disk in enumerate(disk_info):
6256       disk_index = idx + base_index
6257       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6258                                       disk["size"], names[idx*2:idx*2+2],
6259                                       "disk/%d" % disk_index,
6260                                       minors[idx*2], minors[idx*2+1])
6261       disk_dev.mode = disk["mode"]
6262       disks.append(disk_dev)
6263   elif template_name == constants.DT_FILE:
6264     if len(secondary_nodes) != 0:
6265       raise errors.ProgrammerError("Wrong template configuration")
6266
6267     _RequireFileStorage()
6268
6269     for idx, disk in enumerate(disk_info):
6270       disk_index = idx + base_index
6271       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6272                               iv_name="disk/%d" % disk_index,
6273                               logical_id=(file_driver,
6274                                           "%s/disk%d" % (file_storage_dir,
6275                                                          disk_index)),
6276                               mode=disk["mode"])
6277       disks.append(disk_dev)
6278   else:
6279     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6280   return disks
6281
6282
6283 def _GetInstanceInfoText(instance):
6284   """Compute that text that should be added to the disk's metadata.
6285
6286   """
6287   return "originstname+%s" % instance.name
6288
6289
6290 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6291   """Create all disks for an instance.
6292
6293   This abstracts away some work from AddInstance.
6294
6295   @type lu: L{LogicalUnit}
6296   @param lu: the logical unit on whose behalf we execute
6297   @type instance: L{objects.Instance}
6298   @param instance: the instance whose disks we should create
6299   @type to_skip: list
6300   @param to_skip: list of indices to skip
6301   @type target_node: string
6302   @param target_node: if passed, overrides the target node for creation
6303   @rtype: boolean
6304   @return: the success of the creation
6305
6306   """
6307   info = _GetInstanceInfoText(instance)
6308   if target_node is None:
6309     pnode = instance.primary_node
6310     all_nodes = instance.all_nodes
6311   else:
6312     pnode = target_node
6313     all_nodes = [pnode]
6314
6315   if instance.disk_template == constants.DT_FILE:
6316     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6317     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6318
6319     result.Raise("Failed to create directory '%s' on"
6320                  " node %s" % (file_storage_dir, pnode))
6321
6322   # Note: this needs to be kept in sync with adding of disks in
6323   # LUSetInstanceParams
6324   for idx, device in enumerate(instance.disks):
6325     if to_skip and idx in to_skip:
6326       continue
6327     logging.info("Creating volume %s for instance %s",
6328                  device.iv_name, instance.name)
6329     #HARDCODE
6330     for node in all_nodes:
6331       f_create = node == pnode
6332       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6333
6334
6335 def _RemoveDisks(lu, instance, target_node=None):
6336   """Remove all disks for an instance.
6337
6338   This abstracts away some work from `AddInstance()` and
6339   `RemoveInstance()`. Note that in case some of the devices couldn't
6340   be removed, the removal will continue with the other ones (compare
6341   with `_CreateDisks()`).
6342
6343   @type lu: L{LogicalUnit}
6344   @param lu: the logical unit on whose behalf we execute
6345   @type instance: L{objects.Instance}
6346   @param instance: the instance whose disks we should remove
6347   @type target_node: string
6348   @param target_node: used to override the node on which to remove the disks
6349   @rtype: boolean
6350   @return: the success of the removal
6351
6352   """
6353   logging.info("Removing block devices for instance %s", instance.name)
6354
6355   all_result = True
6356   for device in instance.disks:
6357     if target_node:
6358       edata = [(target_node, device)]
6359     else:
6360       edata = device.ComputeNodeTree(instance.primary_node)
6361     for node, disk in edata:
6362       lu.cfg.SetDiskID(disk, node)
6363       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6364       if msg:
6365         lu.LogWarning("Could not remove block device %s on node %s,"
6366                       " continuing anyway: %s", device.iv_name, node, msg)
6367         all_result = False
6368
6369   if instance.disk_template == constants.DT_FILE:
6370     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6371     if target_node:
6372       tgt = target_node
6373     else:
6374       tgt = instance.primary_node
6375     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6376     if result.fail_msg:
6377       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6378                     file_storage_dir, instance.primary_node, result.fail_msg)
6379       all_result = False
6380
6381   return all_result
6382
6383
6384 def _ComputeDiskSize(disk_template, disks):
6385   """Compute disk size requirements in the volume group
6386
6387   """
6388   # Required free disk space as a function of disk and swap space
6389   req_size_dict = {
6390     constants.DT_DISKLESS: None,
6391     constants.DT_PLAIN: sum(d["size"] for d in disks),
6392     # 128 MB are added for drbd metadata for each disk
6393     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6394     constants.DT_FILE: None,
6395   }
6396
6397   if disk_template not in req_size_dict:
6398     raise errors.ProgrammerError("Disk template '%s' size requirement"
6399                                  " is unknown" %  disk_template)
6400
6401   return req_size_dict[disk_template]
6402
6403
6404 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6405   """Hypervisor parameter validation.
6406
6407   This function abstract the hypervisor parameter validation to be
6408   used in both instance create and instance modify.
6409
6410   @type lu: L{LogicalUnit}
6411   @param lu: the logical unit for which we check
6412   @type nodenames: list
6413   @param nodenames: the list of nodes on which we should check
6414   @type hvname: string
6415   @param hvname: the name of the hypervisor we should use
6416   @type hvparams: dict
6417   @param hvparams: the parameters which we need to check
6418   @raise errors.OpPrereqError: if the parameters are not valid
6419
6420   """
6421   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6422                                                   hvname,
6423                                                   hvparams)
6424   for node in nodenames:
6425     info = hvinfo[node]
6426     if info.offline:
6427       continue
6428     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6429
6430
6431 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6432   """OS parameters validation.
6433
6434   @type lu: L{LogicalUnit}
6435   @param lu: the logical unit for which we check
6436   @type required: boolean
6437   @param required: whether the validation should fail if the OS is not
6438       found
6439   @type nodenames: list
6440   @param nodenames: the list of nodes on which we should check
6441   @type osname: string
6442   @param osname: the name of the hypervisor we should use
6443   @type osparams: dict
6444   @param osparams: the parameters which we need to check
6445   @raise errors.OpPrereqError: if the parameters are not valid
6446
6447   """
6448   result = lu.rpc.call_os_validate(required, nodenames, osname,
6449                                    [constants.OS_VALIDATE_PARAMETERS],
6450                                    osparams)
6451   for node, nres in result.items():
6452     # we don't check for offline cases since this should be run only
6453     # against the master node and/or an instance's nodes
6454     nres.Raise("OS Parameters validation failed on node %s" % node)
6455     if not nres.payload:
6456       lu.LogInfo("OS %s not found on node %s, validation skipped",
6457                  osname, node)
6458
6459
6460 class LUCreateInstance(LogicalUnit):
6461   """Create an instance.
6462
6463   """
6464   HPATH = "instance-add"
6465   HTYPE = constants.HTYPE_INSTANCE
6466   _OP_PARAMS = [
6467     _PInstanceName,
6468     ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
6469     ("start", True, ht.TBool),
6470     ("wait_for_sync", True, ht.TBool),
6471     ("ip_check", True, ht.TBool),
6472     ("name_check", True, ht.TBool),
6473     ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
6474     ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
6475     ("hvparams", ht.EmptyDict, ht.TDict),
6476     ("beparams", ht.EmptyDict, ht.TDict),
6477     ("osparams", ht.EmptyDict, ht.TDict),
6478     ("no_install", None, ht.TMaybeBool),
6479     ("os_type", None, ht.TMaybeString),
6480     ("force_variant", False, ht.TBool),
6481     ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
6482     ("source_x509_ca", None, ht.TMaybeString),
6483     ("source_instance_name", None, ht.TMaybeString),
6484     ("src_node", None, ht.TMaybeString),
6485     ("src_path", None, ht.TMaybeString),
6486     ("pnode", None, ht.TMaybeString),
6487     ("snode", None, ht.TMaybeString),
6488     ("iallocator", None, ht.TMaybeString),
6489     ("hypervisor", None, ht.TMaybeString),
6490     ("disk_template", ht.NoDefault, _CheckDiskTemplate),
6491     ("identify_defaults", False, ht.TBool),
6492     ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
6493     ("file_storage_dir", None, ht.TMaybeString),
6494     ]
6495   REQ_BGL = False
6496
6497   def CheckArguments(self):
6498     """Check arguments.
6499
6500     """
6501     # do not require name_check to ease forward/backward compatibility
6502     # for tools
6503     if self.op.no_install and self.op.start:
6504       self.LogInfo("No-installation mode selected, disabling startup")
6505       self.op.start = False
6506     # validate/normalize the instance name
6507     self.op.instance_name = \
6508       netutils.Hostname.GetNormalizedName(self.op.instance_name)
6509
6510     if self.op.ip_check and not self.op.name_check:
6511       # TODO: make the ip check more flexible and not depend on the name check
6512       raise errors.OpPrereqError("Cannot do ip check without a name check",
6513                                  errors.ECODE_INVAL)
6514
6515     # check nics' parameter names
6516     for nic in self.op.nics:
6517       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6518
6519     # check disks. parameter names and consistent adopt/no-adopt strategy
6520     has_adopt = has_no_adopt = False
6521     for disk in self.op.disks:
6522       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6523       if "adopt" in disk:
6524         has_adopt = True
6525       else:
6526         has_no_adopt = True
6527     if has_adopt and has_no_adopt:
6528       raise errors.OpPrereqError("Either all disks are adopted or none is",
6529                                  errors.ECODE_INVAL)
6530     if has_adopt:
6531       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6532         raise errors.OpPrereqError("Disk adoption is not supported for the"
6533                                    " '%s' disk template" %
6534                                    self.op.disk_template,
6535                                    errors.ECODE_INVAL)
6536       if self.op.iallocator is not None:
6537         raise errors.OpPrereqError("Disk adoption not allowed with an"
6538                                    " iallocator script", errors.ECODE_INVAL)
6539       if self.op.mode == constants.INSTANCE_IMPORT:
6540         raise errors.OpPrereqError("Disk adoption not allowed for"
6541                                    " instance import", errors.ECODE_INVAL)
6542
6543     self.adopt_disks = has_adopt
6544
6545     # instance name verification
6546     if self.op.name_check:
6547       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6548       self.op.instance_name = self.hostname1.name
6549       # used in CheckPrereq for ip ping check
6550       self.check_ip = self.hostname1.ip
6551     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6552       raise errors.OpPrereqError("Remote imports require names to be checked" %
6553                                  errors.ECODE_INVAL)
6554     else:
6555       self.check_ip = None
6556
6557     # file storage checks
6558     if (self.op.file_driver and
6559         not self.op.file_driver in constants.FILE_DRIVER):
6560       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6561                                  self.op.file_driver, errors.ECODE_INVAL)
6562
6563     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6564       raise errors.OpPrereqError("File storage directory path not absolute",
6565                                  errors.ECODE_INVAL)
6566
6567     ### Node/iallocator related checks
6568     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6569
6570     if self.op.pnode is not None:
6571       if self.op.disk_template in constants.DTS_NET_MIRROR:
6572         if self.op.snode is None:
6573           raise errors.OpPrereqError("The networked disk templates need"
6574                                      " a mirror node", errors.ECODE_INVAL)
6575       elif self.op.snode:
6576         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6577                         " template")
6578         self.op.snode = None
6579
6580     self._cds = _GetClusterDomainSecret()
6581
6582     if self.op.mode == constants.INSTANCE_IMPORT:
6583       # On import force_variant must be True, because if we forced it at
6584       # initial install, our only chance when importing it back is that it
6585       # works again!
6586       self.op.force_variant = True
6587
6588       if self.op.no_install:
6589         self.LogInfo("No-installation mode has no effect during import")
6590
6591     elif self.op.mode == constants.INSTANCE_CREATE:
6592       if self.op.os_type is None:
6593         raise errors.OpPrereqError("No guest OS specified",
6594                                    errors.ECODE_INVAL)
6595       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6596         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6597                                    " installation" % self.op.os_type,
6598                                    errors.ECODE_STATE)
6599       if self.op.disk_template is None:
6600         raise errors.OpPrereqError("No disk template specified",
6601                                    errors.ECODE_INVAL)
6602
6603     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6604       # Check handshake to ensure both clusters have the same domain secret
6605       src_handshake = self.op.source_handshake
6606       if not src_handshake:
6607         raise errors.OpPrereqError("Missing source handshake",
6608                                    errors.ECODE_INVAL)
6609
6610       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6611                                                            src_handshake)
6612       if errmsg:
6613         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6614                                    errors.ECODE_INVAL)
6615
6616       # Load and check source CA
6617       self.source_x509_ca_pem = self.op.source_x509_ca
6618       if not self.source_x509_ca_pem:
6619         raise errors.OpPrereqError("Missing source X509 CA",
6620                                    errors.ECODE_INVAL)
6621
6622       try:
6623         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6624                                                     self._cds)
6625       except OpenSSL.crypto.Error, err:
6626         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6627                                    (err, ), errors.ECODE_INVAL)
6628
6629       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6630       if errcode is not None:
6631         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6632                                    errors.ECODE_INVAL)
6633
6634       self.source_x509_ca = cert
6635
6636       src_instance_name = self.op.source_instance_name
6637       if not src_instance_name:
6638         raise errors.OpPrereqError("Missing source instance name",
6639                                    errors.ECODE_INVAL)
6640
6641       self.source_instance_name = \
6642           netutils.GetHostname(name=src_instance_name).name
6643
6644     else:
6645       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6646                                  self.op.mode, errors.ECODE_INVAL)
6647
6648   def ExpandNames(self):
6649     """ExpandNames for CreateInstance.
6650
6651     Figure out the right locks for instance creation.
6652
6653     """
6654     self.needed_locks = {}
6655
6656     instance_name = self.op.instance_name
6657     # this is just a preventive check, but someone might still add this
6658     # instance in the meantime, and creation will fail at lock-add time
6659     if instance_name in self.cfg.GetInstanceList():
6660       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6661                                  instance_name, errors.ECODE_EXISTS)
6662
6663     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6664
6665     if self.op.iallocator:
6666       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6667     else:
6668       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6669       nodelist = [self.op.pnode]
6670       if self.op.snode is not None:
6671         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6672         nodelist.append(self.op.snode)
6673       self.needed_locks[locking.LEVEL_NODE] = nodelist
6674
6675     # in case of import lock the source node too
6676     if self.op.mode == constants.INSTANCE_IMPORT:
6677       src_node = self.op.src_node
6678       src_path = self.op.src_path
6679
6680       if src_path is None:
6681         self.op.src_path = src_path = self.op.instance_name
6682
6683       if src_node is None:
6684         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6685         self.op.src_node = None
6686         if os.path.isabs(src_path):
6687           raise errors.OpPrereqError("Importing an instance from an absolute"
6688                                      " path requires a source node option.",
6689                                      errors.ECODE_INVAL)
6690       else:
6691         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6692         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6693           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6694         if not os.path.isabs(src_path):
6695           self.op.src_path = src_path = \
6696             utils.PathJoin(constants.EXPORT_DIR, src_path)
6697
6698   def _RunAllocator(self):
6699     """Run the allocator based on input opcode.
6700
6701     """
6702     nics = [n.ToDict() for n in self.nics]
6703     ial = IAllocator(self.cfg, self.rpc,
6704                      mode=constants.IALLOCATOR_MODE_ALLOC,
6705                      name=self.op.instance_name,
6706                      disk_template=self.op.disk_template,
6707                      tags=[],
6708                      os=self.op.os_type,
6709                      vcpus=self.be_full[constants.BE_VCPUS],
6710                      mem_size=self.be_full[constants.BE_MEMORY],
6711                      disks=self.disks,
6712                      nics=nics,
6713                      hypervisor=self.op.hypervisor,
6714                      )
6715
6716     ial.Run(self.op.iallocator)
6717
6718     if not ial.success:
6719       raise errors.OpPrereqError("Can't compute nodes using"
6720                                  " iallocator '%s': %s" %
6721                                  (self.op.iallocator, ial.info),
6722                                  errors.ECODE_NORES)
6723     if len(ial.result) != ial.required_nodes:
6724       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6725                                  " of nodes (%s), required %s" %
6726                                  (self.op.iallocator, len(ial.result),
6727                                   ial.required_nodes), errors.ECODE_FAULT)
6728     self.op.pnode = ial.result[0]
6729     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6730                  self.op.instance_name, self.op.iallocator,
6731                  utils.CommaJoin(ial.result))
6732     if ial.required_nodes == 2:
6733       self.op.snode = ial.result[1]
6734
6735   def BuildHooksEnv(self):
6736     """Build hooks env.
6737
6738     This runs on master, primary and secondary nodes of the instance.
6739
6740     """
6741     env = {
6742       "ADD_MODE": self.op.mode,
6743       }
6744     if self.op.mode == constants.INSTANCE_IMPORT:
6745       env["SRC_NODE"] = self.op.src_node
6746       env["SRC_PATH"] = self.op.src_path
6747       env["SRC_IMAGES"] = self.src_images
6748
6749     env.update(_BuildInstanceHookEnv(
6750       name=self.op.instance_name,
6751       primary_node=self.op.pnode,
6752       secondary_nodes=self.secondaries,
6753       status=self.op.start,
6754       os_type=self.op.os_type,
6755       memory=self.be_full[constants.BE_MEMORY],
6756       vcpus=self.be_full[constants.BE_VCPUS],
6757       nics=_NICListToTuple(self, self.nics),
6758       disk_template=self.op.disk_template,
6759       disks=[(d["size"], d["mode"]) for d in self.disks],
6760       bep=self.be_full,
6761       hvp=self.hv_full,
6762       hypervisor_name=self.op.hypervisor,
6763     ))
6764
6765     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6766           self.secondaries)
6767     return env, nl, nl
6768
6769   def _ReadExportInfo(self):
6770     """Reads the export information from disk.
6771
6772     It will override the opcode source node and path with the actual
6773     information, if these two were not specified before.
6774
6775     @return: the export information
6776
6777     """
6778     assert self.op.mode == constants.INSTANCE_IMPORT
6779
6780     src_node = self.op.src_node
6781     src_path = self.op.src_path
6782
6783     if src_node is None:
6784       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6785       exp_list = self.rpc.call_export_list(locked_nodes)
6786       found = False
6787       for node in exp_list:
6788         if exp_list[node].fail_msg:
6789           continue
6790         if src_path in exp_list[node].payload:
6791           found = True
6792           self.op.src_node = src_node = node
6793           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6794                                                        src_path)
6795           break
6796       if not found:
6797         raise errors.OpPrereqError("No export found for relative path %s" %
6798                                     src_path, errors.ECODE_INVAL)
6799
6800     _CheckNodeOnline(self, src_node)
6801     result = self.rpc.call_export_info(src_node, src_path)
6802     result.Raise("No export or invalid export found in dir %s" % src_path)
6803
6804     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6805     if not export_info.has_section(constants.INISECT_EXP):
6806       raise errors.ProgrammerError("Corrupted export config",
6807                                    errors.ECODE_ENVIRON)
6808
6809     ei_version = export_info.get(constants.INISECT_EXP, "version")
6810     if (int(ei_version) != constants.EXPORT_VERSION):
6811       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6812                                  (ei_version, constants.EXPORT_VERSION),
6813                                  errors.ECODE_ENVIRON)
6814     return export_info
6815
6816   def _ReadExportParams(self, einfo):
6817     """Use export parameters as defaults.
6818
6819     In case the opcode doesn't specify (as in override) some instance
6820     parameters, then try to use them from the export information, if
6821     that declares them.
6822
6823     """
6824     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6825
6826     if self.op.disk_template is None:
6827       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6828         self.op.disk_template = einfo.get(constants.INISECT_INS,
6829                                           "disk_template")
6830       else:
6831         raise errors.OpPrereqError("No disk template specified and the export"
6832                                    " is missing the disk_template information",
6833                                    errors.ECODE_INVAL)
6834
6835     if not self.op.disks:
6836       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6837         disks = []
6838         # TODO: import the disk iv_name too
6839         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6840           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6841           disks.append({"size": disk_sz})
6842         self.op.disks = disks
6843       else:
6844         raise errors.OpPrereqError("No disk info specified and the export"
6845                                    " is missing the disk information",
6846                                    errors.ECODE_INVAL)
6847
6848     if (not self.op.nics and
6849         einfo.has_option(constants.INISECT_INS, "nic_count")):
6850       nics = []
6851       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6852         ndict = {}
6853         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6854           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6855           ndict[name] = v
6856         nics.append(ndict)
6857       self.op.nics = nics
6858
6859     if (self.op.hypervisor is None and
6860         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6861       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6862     if einfo.has_section(constants.INISECT_HYP):
6863       # use the export parameters but do not override the ones
6864       # specified by the user
6865       for name, value in einfo.items(constants.INISECT_HYP):
6866         if name not in self.op.hvparams:
6867           self.op.hvparams[name] = value
6868
6869     if einfo.has_section(constants.INISECT_BEP):
6870       # use the parameters, without overriding
6871       for name, value in einfo.items(constants.INISECT_BEP):
6872         if name not in self.op.beparams:
6873           self.op.beparams[name] = value
6874     else:
6875       # try to read the parameters old style, from the main section
6876       for name in constants.BES_PARAMETERS:
6877         if (name not in self.op.beparams and
6878             einfo.has_option(constants.INISECT_INS, name)):
6879           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6880
6881     if einfo.has_section(constants.INISECT_OSP):
6882       # use the parameters, without overriding
6883       for name, value in einfo.items(constants.INISECT_OSP):
6884         if name not in self.op.osparams:
6885           self.op.osparams[name] = value
6886
6887   def _RevertToDefaults(self, cluster):
6888     """Revert the instance parameters to the default values.
6889
6890     """
6891     # hvparams
6892     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6893     for name in self.op.hvparams.keys():
6894       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6895         del self.op.hvparams[name]
6896     # beparams
6897     be_defs = cluster.SimpleFillBE({})
6898     for name in self.op.beparams.keys():
6899       if name in be_defs and be_defs[name] == self.op.beparams[name]:
6900         del self.op.beparams[name]
6901     # nic params
6902     nic_defs = cluster.SimpleFillNIC({})
6903     for nic in self.op.nics:
6904       for name in constants.NICS_PARAMETERS:
6905         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6906           del nic[name]
6907     # osparams
6908     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6909     for name in self.op.osparams.keys():
6910       if name in os_defs and os_defs[name] == self.op.osparams[name]:
6911         del self.op.osparams[name]
6912
6913   def CheckPrereq(self):
6914     """Check prerequisites.
6915
6916     """
6917     if self.op.mode == constants.INSTANCE_IMPORT:
6918       export_info = self._ReadExportInfo()
6919       self._ReadExportParams(export_info)
6920
6921     _CheckDiskTemplate(self.op.disk_template)
6922
6923     if (not self.cfg.GetVGName() and
6924         self.op.disk_template not in constants.DTS_NOT_LVM):
6925       raise errors.OpPrereqError("Cluster does not support lvm-based"
6926                                  " instances", errors.ECODE_STATE)
6927
6928     if self.op.hypervisor is None:
6929       self.op.hypervisor = self.cfg.GetHypervisorType()
6930
6931     cluster = self.cfg.GetClusterInfo()
6932     enabled_hvs = cluster.enabled_hypervisors
6933     if self.op.hypervisor not in enabled_hvs:
6934       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6935                                  " cluster (%s)" % (self.op.hypervisor,
6936                                   ",".join(enabled_hvs)),
6937                                  errors.ECODE_STATE)
6938
6939     # check hypervisor parameter syntax (locally)
6940     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6941     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6942                                       self.op.hvparams)
6943     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6944     hv_type.CheckParameterSyntax(filled_hvp)
6945     self.hv_full = filled_hvp
6946     # check that we don't specify global parameters on an instance
6947     _CheckGlobalHvParams(self.op.hvparams)
6948
6949     # fill and remember the beparams dict
6950     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6951     self.be_full = cluster.SimpleFillBE(self.op.beparams)
6952
6953     # build os parameters
6954     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6955
6956     # now that hvp/bep are in final format, let's reset to defaults,
6957     # if told to do so
6958     if self.op.identify_defaults:
6959       self._RevertToDefaults(cluster)
6960
6961     # NIC buildup
6962     self.nics = []
6963     for idx, nic in enumerate(self.op.nics):
6964       nic_mode_req = nic.get("mode", None)
6965       nic_mode = nic_mode_req
6966       if nic_mode is None:
6967         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6968
6969       # in routed mode, for the first nic, the default ip is 'auto'
6970       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6971         default_ip_mode = constants.VALUE_AUTO
6972       else:
6973         default_ip_mode = constants.VALUE_NONE
6974
6975       # ip validity checks
6976       ip = nic.get("ip", default_ip_mode)
6977       if ip is None or ip.lower() == constants.VALUE_NONE:
6978         nic_ip = None
6979       elif ip.lower() == constants.VALUE_AUTO:
6980         if not self.op.name_check:
6981           raise errors.OpPrereqError("IP address set to auto but name checks"
6982                                      " have been skipped",
6983                                      errors.ECODE_INVAL)
6984         nic_ip = self.hostname1.ip
6985       else:
6986         if not netutils.IPAddress.IsValid(ip):
6987           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
6988                                      errors.ECODE_INVAL)
6989         nic_ip = ip
6990
6991       # TODO: check the ip address for uniqueness
6992       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6993         raise errors.OpPrereqError("Routed nic mode requires an ip address",
6994                                    errors.ECODE_INVAL)
6995
6996       # MAC address verification
6997       mac = nic.get("mac", constants.VALUE_AUTO)
6998       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6999         mac = utils.NormalizeAndValidateMac(mac)
7000
7001         try:
7002           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7003         except errors.ReservationError:
7004           raise errors.OpPrereqError("MAC address %s already in use"
7005                                      " in cluster" % mac,
7006                                      errors.ECODE_NOTUNIQUE)
7007
7008       # bridge verification
7009       bridge = nic.get("bridge", None)
7010       link = nic.get("link", None)
7011       if bridge and link:
7012         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
7013                                    " at the same time", errors.ECODE_INVAL)
7014       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7015         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7016                                    errors.ECODE_INVAL)
7017       elif bridge:
7018         link = bridge
7019
7020       nicparams = {}
7021       if nic_mode_req:
7022         nicparams[constants.NIC_MODE] = nic_mode_req
7023       if link:
7024         nicparams[constants.NIC_LINK] = link
7025
7026       check_params = cluster.SimpleFillNIC(nicparams)
7027       objects.NIC.CheckParameterSyntax(check_params)
7028       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7029
7030     # disk checks/pre-build
7031     self.disks = []
7032     for disk in self.op.disks:
7033       mode = disk.get("mode", constants.DISK_RDWR)
7034       if mode not in constants.DISK_ACCESS_SET:
7035         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7036                                    mode, errors.ECODE_INVAL)
7037       size = disk.get("size", None)
7038       if size is None:
7039         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7040       try:
7041         size = int(size)
7042       except (TypeError, ValueError):
7043         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7044                                    errors.ECODE_INVAL)
7045       new_disk = {"size": size, "mode": mode}
7046       if "adopt" in disk:
7047         new_disk["adopt"] = disk["adopt"]
7048       self.disks.append(new_disk)
7049
7050     if self.op.mode == constants.INSTANCE_IMPORT:
7051
7052       # Check that the new instance doesn't have less disks than the export
7053       instance_disks = len(self.disks)
7054       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7055       if instance_disks < export_disks:
7056         raise errors.OpPrereqError("Not enough disks to import."
7057                                    " (instance: %d, export: %d)" %
7058                                    (instance_disks, export_disks),
7059                                    errors.ECODE_INVAL)
7060
7061       disk_images = []
7062       for idx in range(export_disks):
7063         option = 'disk%d_dump' % idx
7064         if export_info.has_option(constants.INISECT_INS, option):
7065           # FIXME: are the old os-es, disk sizes, etc. useful?
7066           export_name = export_info.get(constants.INISECT_INS, option)
7067           image = utils.PathJoin(self.op.src_path, export_name)
7068           disk_images.append(image)
7069         else:
7070           disk_images.append(False)
7071
7072       self.src_images = disk_images
7073
7074       old_name = export_info.get(constants.INISECT_INS, 'name')
7075       try:
7076         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7077       except (TypeError, ValueError), err:
7078         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7079                                    " an integer: %s" % str(err),
7080                                    errors.ECODE_STATE)
7081       if self.op.instance_name == old_name:
7082         for idx, nic in enumerate(self.nics):
7083           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7084             nic_mac_ini = 'nic%d_mac' % idx
7085             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7086
7087     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7088
7089     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7090     if self.op.ip_check:
7091       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7092         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7093                                    (self.check_ip, self.op.instance_name),
7094                                    errors.ECODE_NOTUNIQUE)
7095
7096     #### mac address generation
7097     # By generating here the mac address both the allocator and the hooks get
7098     # the real final mac address rather than the 'auto' or 'generate' value.
7099     # There is a race condition between the generation and the instance object
7100     # creation, which means that we know the mac is valid now, but we're not
7101     # sure it will be when we actually add the instance. If things go bad
7102     # adding the instance will abort because of a duplicate mac, and the
7103     # creation job will fail.
7104     for nic in self.nics:
7105       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7106         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7107
7108     #### allocator run
7109
7110     if self.op.iallocator is not None:
7111       self._RunAllocator()
7112
7113     #### node related checks
7114
7115     # check primary node
7116     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7117     assert self.pnode is not None, \
7118       "Cannot retrieve locked node %s" % self.op.pnode
7119     if pnode.offline:
7120       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7121                                  pnode.name, errors.ECODE_STATE)
7122     if pnode.drained:
7123       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7124                                  pnode.name, errors.ECODE_STATE)
7125
7126     self.secondaries = []
7127
7128     # mirror node verification
7129     if self.op.disk_template in constants.DTS_NET_MIRROR:
7130       if self.op.snode == pnode.name:
7131         raise errors.OpPrereqError("The secondary node cannot be the"
7132                                    " primary node.", errors.ECODE_INVAL)
7133       _CheckNodeOnline(self, self.op.snode)
7134       _CheckNodeNotDrained(self, self.op.snode)
7135       self.secondaries.append(self.op.snode)
7136
7137     nodenames = [pnode.name] + self.secondaries
7138
7139     req_size = _ComputeDiskSize(self.op.disk_template,
7140                                 self.disks)
7141
7142     # Check lv size requirements, if not adopting
7143     if req_size is not None and not self.adopt_disks:
7144       _CheckNodesFreeDisk(self, nodenames, req_size)
7145
7146     if self.adopt_disks: # instead, we must check the adoption data
7147       all_lvs = set([i["adopt"] for i in self.disks])
7148       if len(all_lvs) != len(self.disks):
7149         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7150                                    errors.ECODE_INVAL)
7151       for lv_name in all_lvs:
7152         try:
7153           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7154         except errors.ReservationError:
7155           raise errors.OpPrereqError("LV named %s used by another instance" %
7156                                      lv_name, errors.ECODE_NOTUNIQUE)
7157
7158       node_lvs = self.rpc.call_lv_list([pnode.name],
7159                                        self.cfg.GetVGName())[pnode.name]
7160       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7161       node_lvs = node_lvs.payload
7162       delta = all_lvs.difference(node_lvs.keys())
7163       if delta:
7164         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7165                                    utils.CommaJoin(delta),
7166                                    errors.ECODE_INVAL)
7167       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7168       if online_lvs:
7169         raise errors.OpPrereqError("Online logical volumes found, cannot"
7170                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7171                                    errors.ECODE_STATE)
7172       # update the size of disk based on what is found
7173       for dsk in self.disks:
7174         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7175
7176     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7177
7178     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7179     # check OS parameters (remotely)
7180     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7181
7182     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7183
7184     # memory check on primary node
7185     if self.op.start:
7186       _CheckNodeFreeMemory(self, self.pnode.name,
7187                            "creating instance %s" % self.op.instance_name,
7188                            self.be_full[constants.BE_MEMORY],
7189                            self.op.hypervisor)
7190
7191     self.dry_run_result = list(nodenames)
7192
7193   def Exec(self, feedback_fn):
7194     """Create and add the instance to the cluster.
7195
7196     """
7197     instance = self.op.instance_name
7198     pnode_name = self.pnode.name
7199
7200     ht_kind = self.op.hypervisor
7201     if ht_kind in constants.HTS_REQ_PORT:
7202       network_port = self.cfg.AllocatePort()
7203     else:
7204       network_port = None
7205
7206     if constants.ENABLE_FILE_STORAGE:
7207       # this is needed because os.path.join does not accept None arguments
7208       if self.op.file_storage_dir is None:
7209         string_file_storage_dir = ""
7210       else:
7211         string_file_storage_dir = self.op.file_storage_dir
7212
7213       # build the full file storage dir path
7214       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7215                                         string_file_storage_dir, instance)
7216     else:
7217       file_storage_dir = ""
7218
7219     disks = _GenerateDiskTemplate(self,
7220                                   self.op.disk_template,
7221                                   instance, pnode_name,
7222                                   self.secondaries,
7223                                   self.disks,
7224                                   file_storage_dir,
7225                                   self.op.file_driver,
7226                                   0)
7227
7228     iobj = objects.Instance(name=instance, os=self.op.os_type,
7229                             primary_node=pnode_name,
7230                             nics=self.nics, disks=disks,
7231                             disk_template=self.op.disk_template,
7232                             admin_up=False,
7233                             network_port=network_port,
7234                             beparams=self.op.beparams,
7235                             hvparams=self.op.hvparams,
7236                             hypervisor=self.op.hypervisor,
7237                             osparams=self.op.osparams,
7238                             )
7239
7240     if self.adopt_disks:
7241       # rename LVs to the newly-generated names; we need to construct
7242       # 'fake' LV disks with the old data, plus the new unique_id
7243       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7244       rename_to = []
7245       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7246         rename_to.append(t_dsk.logical_id)
7247         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7248         self.cfg.SetDiskID(t_dsk, pnode_name)
7249       result = self.rpc.call_blockdev_rename(pnode_name,
7250                                              zip(tmp_disks, rename_to))
7251       result.Raise("Failed to rename adoped LVs")
7252     else:
7253       feedback_fn("* creating instance disks...")
7254       try:
7255         _CreateDisks(self, iobj)
7256       except errors.OpExecError:
7257         self.LogWarning("Device creation failed, reverting...")
7258         try:
7259           _RemoveDisks(self, iobj)
7260         finally:
7261           self.cfg.ReleaseDRBDMinors(instance)
7262           raise
7263
7264     feedback_fn("adding instance %s to cluster config" % instance)
7265
7266     self.cfg.AddInstance(iobj, self.proc.GetECId())
7267
7268     # Declare that we don't want to remove the instance lock anymore, as we've
7269     # added the instance to the config
7270     del self.remove_locks[locking.LEVEL_INSTANCE]
7271     # Unlock all the nodes
7272     if self.op.mode == constants.INSTANCE_IMPORT:
7273       nodes_keep = [self.op.src_node]
7274       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7275                        if node != self.op.src_node]
7276       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7277       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7278     else:
7279       self.context.glm.release(locking.LEVEL_NODE)
7280       del self.acquired_locks[locking.LEVEL_NODE]
7281
7282     if self.op.wait_for_sync:
7283       disk_abort = not _WaitForSync(self, iobj)
7284     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7285       # make sure the disks are not degraded (still sync-ing is ok)
7286       time.sleep(15)
7287       feedback_fn("* checking mirrors status")
7288       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7289     else:
7290       disk_abort = False
7291
7292     if disk_abort:
7293       _RemoveDisks(self, iobj)
7294       self.cfg.RemoveInstance(iobj.name)
7295       # Make sure the instance lock gets removed
7296       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7297       raise errors.OpExecError("There are some degraded disks for"
7298                                " this instance")
7299
7300     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7301       if self.op.mode == constants.INSTANCE_CREATE:
7302         if not self.op.no_install:
7303           feedback_fn("* running the instance OS create scripts...")
7304           # FIXME: pass debug option from opcode to backend
7305           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7306                                                  self.op.debug_level)
7307           result.Raise("Could not add os for instance %s"
7308                        " on node %s" % (instance, pnode_name))
7309
7310       elif self.op.mode == constants.INSTANCE_IMPORT:
7311         feedback_fn("* running the instance OS import scripts...")
7312
7313         transfers = []
7314
7315         for idx, image in enumerate(self.src_images):
7316           if not image:
7317             continue
7318
7319           # FIXME: pass debug option from opcode to backend
7320           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7321                                              constants.IEIO_FILE, (image, ),
7322                                              constants.IEIO_SCRIPT,
7323                                              (iobj.disks[idx], idx),
7324                                              None)
7325           transfers.append(dt)
7326
7327         import_result = \
7328           masterd.instance.TransferInstanceData(self, feedback_fn,
7329                                                 self.op.src_node, pnode_name,
7330                                                 self.pnode.secondary_ip,
7331                                                 iobj, transfers)
7332         if not compat.all(import_result):
7333           self.LogWarning("Some disks for instance %s on node %s were not"
7334                           " imported successfully" % (instance, pnode_name))
7335
7336       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7337         feedback_fn("* preparing remote import...")
7338         connect_timeout = constants.RIE_CONNECT_TIMEOUT
7339         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7340
7341         disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7342                                                      self.source_x509_ca,
7343                                                      self._cds, timeouts)
7344         if not compat.all(disk_results):
7345           # TODO: Should the instance still be started, even if some disks
7346           # failed to import (valid for local imports, too)?
7347           self.LogWarning("Some disks for instance %s on node %s were not"
7348                           " imported successfully" % (instance, pnode_name))
7349
7350         # Run rename script on newly imported instance
7351         assert iobj.name == instance
7352         feedback_fn("Running rename script for %s" % instance)
7353         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7354                                                    self.source_instance_name,
7355                                                    self.op.debug_level)
7356         if result.fail_msg:
7357           self.LogWarning("Failed to run rename script for %s on node"
7358                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7359
7360       else:
7361         # also checked in the prereq part
7362         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7363                                      % self.op.mode)
7364
7365     if self.op.start:
7366       iobj.admin_up = True
7367       self.cfg.Update(iobj, feedback_fn)
7368       logging.info("Starting instance %s on node %s", instance, pnode_name)
7369       feedback_fn("* starting instance...")
7370       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7371       result.Raise("Could not start instance")
7372
7373     return list(iobj.all_nodes)
7374
7375
7376 class LUConnectConsole(NoHooksLU):
7377   """Connect to an instance's console.
7378
7379   This is somewhat special in that it returns the command line that
7380   you need to run on the master node in order to connect to the
7381   console.
7382
7383   """
7384   _OP_PARAMS = [
7385     _PInstanceName
7386     ]
7387   REQ_BGL = False
7388
7389   def ExpandNames(self):
7390     self._ExpandAndLockInstance()
7391
7392   def CheckPrereq(self):
7393     """Check prerequisites.
7394
7395     This checks that the instance is in the cluster.
7396
7397     """
7398     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7399     assert self.instance is not None, \
7400       "Cannot retrieve locked instance %s" % self.op.instance_name
7401     _CheckNodeOnline(self, self.instance.primary_node)
7402
7403   def Exec(self, feedback_fn):
7404     """Connect to the console of an instance
7405
7406     """
7407     instance = self.instance
7408     node = instance.primary_node
7409
7410     node_insts = self.rpc.call_instance_list([node],
7411                                              [instance.hypervisor])[node]
7412     node_insts.Raise("Can't get node information from %s" % node)
7413
7414     if instance.name not in node_insts.payload:
7415       if instance.admin_up:
7416         state = "ERROR_down"
7417       else:
7418         state = "ADMIN_down"
7419       raise errors.OpExecError("Instance %s is not running (state %s)" %
7420                                (instance.name, state))
7421
7422     logging.debug("Connecting to console of %s on %s", instance.name, node)
7423
7424     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7425     cluster = self.cfg.GetClusterInfo()
7426     # beparams and hvparams are passed separately, to avoid editing the
7427     # instance and then saving the defaults in the instance itself.
7428     hvparams = cluster.FillHV(instance)
7429     beparams = cluster.FillBE(instance)
7430     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7431
7432     # build ssh cmdline
7433     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7434
7435
7436 class LUReplaceDisks(LogicalUnit):
7437   """Replace the disks of an instance.
7438
7439   """
7440   HPATH = "mirrors-replace"
7441   HTYPE = constants.HTYPE_INSTANCE
7442   _OP_PARAMS = [
7443     _PInstanceName,
7444     ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
7445     ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
7446     ("remote_node", None, ht.TMaybeString),
7447     ("iallocator", None, ht.TMaybeString),
7448     ("early_release", False, ht.TBool),
7449     ]
7450   REQ_BGL = False
7451
7452   def CheckArguments(self):
7453     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7454                                   self.op.iallocator)
7455
7456   def ExpandNames(self):
7457     self._ExpandAndLockInstance()
7458
7459     if self.op.iallocator is not None:
7460       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7461
7462     elif self.op.remote_node is not None:
7463       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7464       self.op.remote_node = remote_node
7465
7466       # Warning: do not remove the locking of the new secondary here
7467       # unless DRBD8.AddChildren is changed to work in parallel;
7468       # currently it doesn't since parallel invocations of
7469       # FindUnusedMinor will conflict
7470       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7471       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7472
7473     else:
7474       self.needed_locks[locking.LEVEL_NODE] = []
7475       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7476
7477     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7478                                    self.op.iallocator, self.op.remote_node,
7479                                    self.op.disks, False, self.op.early_release)
7480
7481     self.tasklets = [self.replacer]
7482
7483   def DeclareLocks(self, level):
7484     # If we're not already locking all nodes in the set we have to declare the
7485     # instance's primary/secondary nodes.
7486     if (level == locking.LEVEL_NODE and
7487         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7488       self._LockInstancesNodes()
7489
7490   def BuildHooksEnv(self):
7491     """Build hooks env.
7492
7493     This runs on the master, the primary and all the secondaries.
7494
7495     """
7496     instance = self.replacer.instance
7497     env = {
7498       "MODE": self.op.mode,
7499       "NEW_SECONDARY": self.op.remote_node,
7500       "OLD_SECONDARY": instance.secondary_nodes[0],
7501       }
7502     env.update(_BuildInstanceHookEnvByObject(self, instance))
7503     nl = [
7504       self.cfg.GetMasterNode(),
7505       instance.primary_node,
7506       ]
7507     if self.op.remote_node is not None:
7508       nl.append(self.op.remote_node)
7509     return env, nl, nl
7510
7511
7512 class TLReplaceDisks(Tasklet):
7513   """Replaces disks for an instance.
7514
7515   Note: Locking is not within the scope of this class.
7516
7517   """
7518   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7519                disks, delay_iallocator, early_release):
7520     """Initializes this class.
7521
7522     """
7523     Tasklet.__init__(self, lu)
7524
7525     # Parameters
7526     self.instance_name = instance_name
7527     self.mode = mode
7528     self.iallocator_name = iallocator_name
7529     self.remote_node = remote_node
7530     self.disks = disks
7531     self.delay_iallocator = delay_iallocator
7532     self.early_release = early_release
7533
7534     # Runtime data
7535     self.instance = None
7536     self.new_node = None
7537     self.target_node = None
7538     self.other_node = None
7539     self.remote_node_info = None
7540     self.node_secondary_ip = None
7541
7542   @staticmethod
7543   def CheckArguments(mode, remote_node, iallocator):
7544     """Helper function for users of this class.
7545
7546     """
7547     # check for valid parameter combination
7548     if mode == constants.REPLACE_DISK_CHG:
7549       if remote_node is None and iallocator is None:
7550         raise errors.OpPrereqError("When changing the secondary either an"
7551                                    " iallocator script must be used or the"
7552                                    " new node given", errors.ECODE_INVAL)
7553
7554       if remote_node is not None and iallocator is not None:
7555         raise errors.OpPrereqError("Give either the iallocator or the new"
7556                                    " secondary, not both", errors.ECODE_INVAL)
7557
7558     elif remote_node is not None or iallocator is not None:
7559       # Not replacing the secondary
7560       raise errors.OpPrereqError("The iallocator and new node options can"
7561                                  " only be used when changing the"
7562                                  " secondary node", errors.ECODE_INVAL)
7563
7564   @staticmethod
7565   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7566     """Compute a new secondary node using an IAllocator.
7567
7568     """
7569     ial = IAllocator(lu.cfg, lu.rpc,
7570                      mode=constants.IALLOCATOR_MODE_RELOC,
7571                      name=instance_name,
7572                      relocate_from=relocate_from)
7573
7574     ial.Run(iallocator_name)
7575
7576     if not ial.success:
7577       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7578                                  " %s" % (iallocator_name, ial.info),
7579                                  errors.ECODE_NORES)
7580
7581     if len(ial.result) != ial.required_nodes:
7582       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7583                                  " of nodes (%s), required %s" %
7584                                  (iallocator_name,
7585                                   len(ial.result), ial.required_nodes),
7586                                  errors.ECODE_FAULT)
7587
7588     remote_node_name = ial.result[0]
7589
7590     lu.LogInfo("Selected new secondary for instance '%s': %s",
7591                instance_name, remote_node_name)
7592
7593     return remote_node_name
7594
7595   def _FindFaultyDisks(self, node_name):
7596     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7597                                     node_name, True)
7598
7599   def CheckPrereq(self):
7600     """Check prerequisites.
7601
7602     This checks that the instance is in the cluster.
7603
7604     """
7605     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7606     assert instance is not None, \
7607       "Cannot retrieve locked instance %s" % self.instance_name
7608
7609     if instance.disk_template != constants.DT_DRBD8:
7610       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7611                                  " instances", errors.ECODE_INVAL)
7612
7613     if len(instance.secondary_nodes) != 1:
7614       raise errors.OpPrereqError("The instance has a strange layout,"
7615                                  " expected one secondary but found %d" %
7616                                  len(instance.secondary_nodes),
7617                                  errors.ECODE_FAULT)
7618
7619     if not self.delay_iallocator:
7620       self._CheckPrereq2()
7621
7622   def _CheckPrereq2(self):
7623     """Check prerequisites, second part.
7624
7625     This function should always be part of CheckPrereq. It was separated and is
7626     now called from Exec because during node evacuation iallocator was only
7627     called with an unmodified cluster model, not taking planned changes into
7628     account.
7629
7630     """
7631     instance = self.instance
7632     secondary_node = instance.secondary_nodes[0]
7633
7634     if self.iallocator_name is None:
7635       remote_node = self.remote_node
7636     else:
7637       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7638                                        instance.name, instance.secondary_nodes)
7639
7640     if remote_node is not None:
7641       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7642       assert self.remote_node_info is not None, \
7643         "Cannot retrieve locked node %s" % remote_node
7644     else:
7645       self.remote_node_info = None
7646
7647     if remote_node == self.instance.primary_node:
7648       raise errors.OpPrereqError("The specified node is the primary node of"
7649                                  " the instance.", errors.ECODE_INVAL)
7650
7651     if remote_node == secondary_node:
7652       raise errors.OpPrereqError("The specified node is already the"
7653                                  " secondary node of the instance.",
7654                                  errors.ECODE_INVAL)
7655
7656     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7657                                     constants.REPLACE_DISK_CHG):
7658       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7659                                  errors.ECODE_INVAL)
7660
7661     if self.mode == constants.REPLACE_DISK_AUTO:
7662       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7663       faulty_secondary = self._FindFaultyDisks(secondary_node)
7664
7665       if faulty_primary and faulty_secondary:
7666         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7667                                    " one node and can not be repaired"
7668                                    " automatically" % self.instance_name,
7669                                    errors.ECODE_STATE)
7670
7671       if faulty_primary:
7672         self.disks = faulty_primary
7673         self.target_node = instance.primary_node
7674         self.other_node = secondary_node
7675         check_nodes = [self.target_node, self.other_node]
7676       elif faulty_secondary:
7677         self.disks = faulty_secondary
7678         self.target_node = secondary_node
7679         self.other_node = instance.primary_node
7680         check_nodes = [self.target_node, self.other_node]
7681       else:
7682         self.disks = []
7683         check_nodes = []
7684
7685     else:
7686       # Non-automatic modes
7687       if self.mode == constants.REPLACE_DISK_PRI:
7688         self.target_node = instance.primary_node
7689         self.other_node = secondary_node
7690         check_nodes = [self.target_node, self.other_node]
7691
7692       elif self.mode == constants.REPLACE_DISK_SEC:
7693         self.target_node = secondary_node
7694         self.other_node = instance.primary_node
7695         check_nodes = [self.target_node, self.other_node]
7696
7697       elif self.mode == constants.REPLACE_DISK_CHG:
7698         self.new_node = remote_node
7699         self.other_node = instance.primary_node
7700         self.target_node = secondary_node
7701         check_nodes = [self.new_node, self.other_node]
7702
7703         _CheckNodeNotDrained(self.lu, remote_node)
7704
7705         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7706         assert old_node_info is not None
7707         if old_node_info.offline and not self.early_release:
7708           # doesn't make sense to delay the release
7709           self.early_release = True
7710           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7711                           " early-release mode", secondary_node)
7712
7713       else:
7714         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7715                                      self.mode)
7716
7717       # If not specified all disks should be replaced
7718       if not self.disks:
7719         self.disks = range(len(self.instance.disks))
7720
7721     for node in check_nodes:
7722       _CheckNodeOnline(self.lu, node)
7723
7724     # Check whether disks are valid
7725     for disk_idx in self.disks:
7726       instance.FindDisk(disk_idx)
7727
7728     # Get secondary node IP addresses
7729     node_2nd_ip = {}
7730
7731     for node_name in [self.target_node, self.other_node, self.new_node]:
7732       if node_name is not None:
7733         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7734
7735     self.node_secondary_ip = node_2nd_ip
7736
7737   def Exec(self, feedback_fn):
7738     """Execute disk replacement.
7739
7740     This dispatches the disk replacement to the appropriate handler.
7741
7742     """
7743     if self.delay_iallocator:
7744       self._CheckPrereq2()
7745
7746     if not self.disks:
7747       feedback_fn("No disks need replacement")
7748       return
7749
7750     feedback_fn("Replacing disk(s) %s for %s" %
7751                 (utils.CommaJoin(self.disks), self.instance.name))
7752
7753     activate_disks = (not self.instance.admin_up)
7754
7755     # Activate the instance disks if we're replacing them on a down instance
7756     if activate_disks:
7757       _StartInstanceDisks(self.lu, self.instance, True)
7758
7759     try:
7760       # Should we replace the secondary node?
7761       if self.new_node is not None:
7762         fn = self._ExecDrbd8Secondary
7763       else:
7764         fn = self._ExecDrbd8DiskOnly
7765
7766       return fn(feedback_fn)
7767
7768     finally:
7769       # Deactivate the instance disks if we're replacing them on a
7770       # down instance
7771       if activate_disks:
7772         _SafeShutdownInstanceDisks(self.lu, self.instance)
7773
7774   def _CheckVolumeGroup(self, nodes):
7775     self.lu.LogInfo("Checking volume groups")
7776
7777     vgname = self.cfg.GetVGName()
7778
7779     # Make sure volume group exists on all involved nodes
7780     results = self.rpc.call_vg_list(nodes)
7781     if not results:
7782       raise errors.OpExecError("Can't list volume groups on the nodes")
7783
7784     for node in nodes:
7785       res = results[node]
7786       res.Raise("Error checking node %s" % node)
7787       if vgname not in res.payload:
7788         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7789                                  (vgname, node))
7790
7791   def _CheckDisksExistence(self, nodes):
7792     # Check disk existence
7793     for idx, dev in enumerate(self.instance.disks):
7794       if idx not in self.disks:
7795         continue
7796
7797       for node in nodes:
7798         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7799         self.cfg.SetDiskID(dev, node)
7800
7801         result = self.rpc.call_blockdev_find(node, dev)
7802
7803         msg = result.fail_msg
7804         if msg or not result.payload:
7805           if not msg:
7806             msg = "disk not found"
7807           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7808                                    (idx, node, msg))
7809
7810   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7811     for idx, dev in enumerate(self.instance.disks):
7812       if idx not in self.disks:
7813         continue
7814
7815       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7816                       (idx, node_name))
7817
7818       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7819                                    ldisk=ldisk):
7820         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7821                                  " replace disks for instance %s" %
7822                                  (node_name, self.instance.name))
7823
7824   def _CreateNewStorage(self, node_name):
7825     vgname = self.cfg.GetVGName()
7826     iv_names = {}
7827
7828     for idx, dev in enumerate(self.instance.disks):
7829       if idx not in self.disks:
7830         continue
7831
7832       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7833
7834       self.cfg.SetDiskID(dev, node_name)
7835
7836       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7837       names = _GenerateUniqueNames(self.lu, lv_names)
7838
7839       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7840                              logical_id=(vgname, names[0]))
7841       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7842                              logical_id=(vgname, names[1]))
7843
7844       new_lvs = [lv_data, lv_meta]
7845       old_lvs = dev.children
7846       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7847
7848       # we pass force_create=True to force the LVM creation
7849       for new_lv in new_lvs:
7850         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7851                         _GetInstanceInfoText(self.instance), False)
7852
7853     return iv_names
7854
7855   def _CheckDevices(self, node_name, iv_names):
7856     for name, (dev, _, _) in iv_names.iteritems():
7857       self.cfg.SetDiskID(dev, node_name)
7858
7859       result = self.rpc.call_blockdev_find(node_name, dev)
7860
7861       msg = result.fail_msg
7862       if msg or not result.payload:
7863         if not msg:
7864           msg = "disk not found"
7865         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7866                                  (name, msg))
7867
7868       if result.payload.is_degraded:
7869         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7870
7871   def _RemoveOldStorage(self, node_name, iv_names):
7872     for name, (_, old_lvs, _) in iv_names.iteritems():
7873       self.lu.LogInfo("Remove logical volumes for %s" % name)
7874
7875       for lv in old_lvs:
7876         self.cfg.SetDiskID(lv, node_name)
7877
7878         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7879         if msg:
7880           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7881                              hint="remove unused LVs manually")
7882
7883   def _ReleaseNodeLock(self, node_name):
7884     """Releases the lock for a given node."""
7885     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7886
7887   def _ExecDrbd8DiskOnly(self, feedback_fn):
7888     """Replace a disk on the primary or secondary for DRBD 8.
7889
7890     The algorithm for replace is quite complicated:
7891
7892       1. for each disk to be replaced:
7893
7894         1. create new LVs on the target node with unique names
7895         1. detach old LVs from the drbd device
7896         1. rename old LVs to name_replaced.<time_t>
7897         1. rename new LVs to old LVs
7898         1. attach the new LVs (with the old names now) to the drbd device
7899
7900       1. wait for sync across all devices
7901
7902       1. for each modified disk:
7903
7904         1. remove old LVs (which have the name name_replaces.<time_t>)
7905
7906     Failures are not very well handled.
7907
7908     """
7909     steps_total = 6
7910
7911     # Step: check device activation
7912     self.lu.LogStep(1, steps_total, "Check device existence")
7913     self._CheckDisksExistence([self.other_node, self.target_node])
7914     self._CheckVolumeGroup([self.target_node, self.other_node])
7915
7916     # Step: check other node consistency
7917     self.lu.LogStep(2, steps_total, "Check peer consistency")
7918     self._CheckDisksConsistency(self.other_node,
7919                                 self.other_node == self.instance.primary_node,
7920                                 False)
7921
7922     # Step: create new storage
7923     self.lu.LogStep(3, steps_total, "Allocate new storage")
7924     iv_names = self._CreateNewStorage(self.target_node)
7925
7926     # Step: for each lv, detach+rename*2+attach
7927     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7928     for dev, old_lvs, new_lvs in iv_names.itervalues():
7929       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7930
7931       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7932                                                      old_lvs)
7933       result.Raise("Can't detach drbd from local storage on node"
7934                    " %s for device %s" % (self.target_node, dev.iv_name))
7935       #dev.children = []
7936       #cfg.Update(instance)
7937
7938       # ok, we created the new LVs, so now we know we have the needed
7939       # storage; as such, we proceed on the target node to rename
7940       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7941       # using the assumption that logical_id == physical_id (which in
7942       # turn is the unique_id on that node)
7943
7944       # FIXME(iustin): use a better name for the replaced LVs
7945       temp_suffix = int(time.time())
7946       ren_fn = lambda d, suff: (d.physical_id[0],
7947                                 d.physical_id[1] + "_replaced-%s" % suff)
7948
7949       # Build the rename list based on what LVs exist on the node
7950       rename_old_to_new = []
7951       for to_ren in old_lvs:
7952         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7953         if not result.fail_msg and result.payload:
7954           # device exists
7955           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7956
7957       self.lu.LogInfo("Renaming the old LVs on the target node")
7958       result = self.rpc.call_blockdev_rename(self.target_node,
7959                                              rename_old_to_new)
7960       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7961
7962       # Now we rename the new LVs to the old LVs
7963       self.lu.LogInfo("Renaming the new LVs on the target node")
7964       rename_new_to_old = [(new, old.physical_id)
7965                            for old, new in zip(old_lvs, new_lvs)]
7966       result = self.rpc.call_blockdev_rename(self.target_node,
7967                                              rename_new_to_old)
7968       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7969
7970       for old, new in zip(old_lvs, new_lvs):
7971         new.logical_id = old.logical_id
7972         self.cfg.SetDiskID(new, self.target_node)
7973
7974       for disk in old_lvs:
7975         disk.logical_id = ren_fn(disk, temp_suffix)
7976         self.cfg.SetDiskID(disk, self.target_node)
7977
7978       # Now that the new lvs have the old name, we can add them to the device
7979       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7980       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7981                                                   new_lvs)
7982       msg = result.fail_msg
7983       if msg:
7984         for new_lv in new_lvs:
7985           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7986                                                new_lv).fail_msg
7987           if msg2:
7988             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7989                                hint=("cleanup manually the unused logical"
7990                                      "volumes"))
7991         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7992
7993       dev.children = new_lvs
7994
7995       self.cfg.Update(self.instance, feedback_fn)
7996
7997     cstep = 5
7998     if self.early_release:
7999       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8000       cstep += 1
8001       self._RemoveOldStorage(self.target_node, iv_names)
8002       # WARNING: we release both node locks here, do not do other RPCs
8003       # than WaitForSync to the primary node
8004       self._ReleaseNodeLock([self.target_node, self.other_node])
8005
8006     # Wait for sync
8007     # This can fail as the old devices are degraded and _WaitForSync
8008     # does a combined result over all disks, so we don't check its return value
8009     self.lu.LogStep(cstep, steps_total, "Sync devices")
8010     cstep += 1
8011     _WaitForSync(self.lu, self.instance)
8012
8013     # Check all devices manually
8014     self._CheckDevices(self.instance.primary_node, iv_names)
8015
8016     # Step: remove old storage
8017     if not self.early_release:
8018       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8019       cstep += 1
8020       self._RemoveOldStorage(self.target_node, iv_names)
8021
8022   def _ExecDrbd8Secondary(self, feedback_fn):
8023     """Replace the secondary node for DRBD 8.
8024
8025     The algorithm for replace is quite complicated:
8026       - for all disks of the instance:
8027         - create new LVs on the new node with same names
8028         - shutdown the drbd device on the old secondary
8029         - disconnect the drbd network on the primary
8030         - create the drbd device on the new secondary
8031         - network attach the drbd on the primary, using an artifice:
8032           the drbd code for Attach() will connect to the network if it
8033           finds a device which is connected to the good local disks but
8034           not network enabled
8035       - wait for sync across all devices
8036       - remove all disks from the old secondary
8037
8038     Failures are not very well handled.
8039
8040     """
8041     steps_total = 6
8042
8043     # Step: check device activation
8044     self.lu.LogStep(1, steps_total, "Check device existence")
8045     self._CheckDisksExistence([self.instance.primary_node])
8046     self._CheckVolumeGroup([self.instance.primary_node])
8047
8048     # Step: check other node consistency
8049     self.lu.LogStep(2, steps_total, "Check peer consistency")
8050     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8051
8052     # Step: create new storage
8053     self.lu.LogStep(3, steps_total, "Allocate new storage")
8054     for idx, dev in enumerate(self.instance.disks):
8055       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8056                       (self.new_node, idx))
8057       # we pass force_create=True to force LVM creation
8058       for new_lv in dev.children:
8059         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8060                         _GetInstanceInfoText(self.instance), False)
8061
8062     # Step 4: dbrd minors and drbd setups changes
8063     # after this, we must manually remove the drbd minors on both the
8064     # error and the success paths
8065     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8066     minors = self.cfg.AllocateDRBDMinor([self.new_node
8067                                          for dev in self.instance.disks],
8068                                         self.instance.name)
8069     logging.debug("Allocated minors %r", minors)
8070
8071     iv_names = {}
8072     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8073       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8074                       (self.new_node, idx))
8075       # create new devices on new_node; note that we create two IDs:
8076       # one without port, so the drbd will be activated without
8077       # networking information on the new node at this stage, and one
8078       # with network, for the latter activation in step 4
8079       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8080       if self.instance.primary_node == o_node1:
8081         p_minor = o_minor1
8082       else:
8083         assert self.instance.primary_node == o_node2, "Three-node instance?"
8084         p_minor = o_minor2
8085
8086       new_alone_id = (self.instance.primary_node, self.new_node, None,
8087                       p_minor, new_minor, o_secret)
8088       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8089                     p_minor, new_minor, o_secret)
8090
8091       iv_names[idx] = (dev, dev.children, new_net_id)
8092       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8093                     new_net_id)
8094       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8095                               logical_id=new_alone_id,
8096                               children=dev.children,
8097                               size=dev.size)
8098       try:
8099         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8100                               _GetInstanceInfoText(self.instance), False)
8101       except errors.GenericError:
8102         self.cfg.ReleaseDRBDMinors(self.instance.name)
8103         raise
8104
8105     # We have new devices, shutdown the drbd on the old secondary
8106     for idx, dev in enumerate(self.instance.disks):
8107       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8108       self.cfg.SetDiskID(dev, self.target_node)
8109       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8110       if msg:
8111         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8112                            "node: %s" % (idx, msg),
8113                            hint=("Please cleanup this device manually as"
8114                                  " soon as possible"))
8115
8116     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8117     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8118                                                self.node_secondary_ip,
8119                                                self.instance.disks)\
8120                                               [self.instance.primary_node]
8121
8122     msg = result.fail_msg
8123     if msg:
8124       # detaches didn't succeed (unlikely)
8125       self.cfg.ReleaseDRBDMinors(self.instance.name)
8126       raise errors.OpExecError("Can't detach the disks from the network on"
8127                                " old node: %s" % (msg,))
8128
8129     # if we managed to detach at least one, we update all the disks of
8130     # the instance to point to the new secondary
8131     self.lu.LogInfo("Updating instance configuration")
8132     for dev, _, new_logical_id in iv_names.itervalues():
8133       dev.logical_id = new_logical_id
8134       self.cfg.SetDiskID(dev, self.instance.primary_node)
8135
8136     self.cfg.Update(self.instance, feedback_fn)
8137
8138     # and now perform the drbd attach
8139     self.lu.LogInfo("Attaching primary drbds to new secondary"
8140                     " (standalone => connected)")
8141     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8142                                             self.new_node],
8143                                            self.node_secondary_ip,
8144                                            self.instance.disks,
8145                                            self.instance.name,
8146                                            False)
8147     for to_node, to_result in result.items():
8148       msg = to_result.fail_msg
8149       if msg:
8150         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8151                            to_node, msg,
8152                            hint=("please do a gnt-instance info to see the"
8153                                  " status of disks"))
8154     cstep = 5
8155     if self.early_release:
8156       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8157       cstep += 1
8158       self._RemoveOldStorage(self.target_node, iv_names)
8159       # WARNING: we release all node locks here, do not do other RPCs
8160       # than WaitForSync to the primary node
8161       self._ReleaseNodeLock([self.instance.primary_node,
8162                              self.target_node,
8163                              self.new_node])
8164
8165     # Wait for sync
8166     # This can fail as the old devices are degraded and _WaitForSync
8167     # does a combined result over all disks, so we don't check its return value
8168     self.lu.LogStep(cstep, steps_total, "Sync devices")
8169     cstep += 1
8170     _WaitForSync(self.lu, self.instance)
8171
8172     # Check all devices manually
8173     self._CheckDevices(self.instance.primary_node, iv_names)
8174
8175     # Step: remove old storage
8176     if not self.early_release:
8177       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8178       self._RemoveOldStorage(self.target_node, iv_names)
8179
8180
8181 class LURepairNodeStorage(NoHooksLU):
8182   """Repairs the volume group on a node.
8183
8184   """
8185   _OP_PARAMS = [
8186     _PNodeName,
8187     ("storage_type", ht.NoDefault, _CheckStorageType),
8188     ("name", ht.NoDefault, ht.TNonEmptyString),
8189     ("ignore_consistency", False, ht.TBool),
8190     ]
8191   REQ_BGL = False
8192
8193   def CheckArguments(self):
8194     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8195
8196     storage_type = self.op.storage_type
8197
8198     if (constants.SO_FIX_CONSISTENCY not in
8199         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8200       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8201                                  " repaired" % storage_type,
8202                                  errors.ECODE_INVAL)
8203
8204   def ExpandNames(self):
8205     self.needed_locks = {
8206       locking.LEVEL_NODE: [self.op.node_name],
8207       }
8208
8209   def _CheckFaultyDisks(self, instance, node_name):
8210     """Ensure faulty disks abort the opcode or at least warn."""
8211     try:
8212       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8213                                   node_name, True):
8214         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8215                                    " node '%s'" % (instance.name, node_name),
8216                                    errors.ECODE_STATE)
8217     except errors.OpPrereqError, err:
8218       if self.op.ignore_consistency:
8219         self.proc.LogWarning(str(err.args[0]))
8220       else:
8221         raise
8222
8223   def CheckPrereq(self):
8224     """Check prerequisites.
8225
8226     """
8227     # Check whether any instance on this node has faulty disks
8228     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8229       if not inst.admin_up:
8230         continue
8231       check_nodes = set(inst.all_nodes)
8232       check_nodes.discard(self.op.node_name)
8233       for inst_node_name in check_nodes:
8234         self._CheckFaultyDisks(inst, inst_node_name)
8235
8236   def Exec(self, feedback_fn):
8237     feedback_fn("Repairing storage unit '%s' on %s ..." %
8238                 (self.op.name, self.op.node_name))
8239
8240     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8241     result = self.rpc.call_storage_execute(self.op.node_name,
8242                                            self.op.storage_type, st_args,
8243                                            self.op.name,
8244                                            constants.SO_FIX_CONSISTENCY)
8245     result.Raise("Failed to repair storage unit '%s' on %s" %
8246                  (self.op.name, self.op.node_name))
8247
8248
8249 class LUNodeEvacuationStrategy(NoHooksLU):
8250   """Computes the node evacuation strategy.
8251
8252   """
8253   _OP_PARAMS = [
8254     ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8255     ("remote_node", None, ht.TMaybeString),
8256     ("iallocator", None, ht.TMaybeString),
8257     ]
8258   REQ_BGL = False
8259
8260   def CheckArguments(self):
8261     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8262
8263   def ExpandNames(self):
8264     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8265     self.needed_locks = locks = {}
8266     if self.op.remote_node is None:
8267       locks[locking.LEVEL_NODE] = locking.ALL_SET
8268     else:
8269       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8270       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8271
8272   def Exec(self, feedback_fn):
8273     if self.op.remote_node is not None:
8274       instances = []
8275       for node in self.op.nodes:
8276         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8277       result = []
8278       for i in instances:
8279         if i.primary_node == self.op.remote_node:
8280           raise errors.OpPrereqError("Node %s is the primary node of"
8281                                      " instance %s, cannot use it as"
8282                                      " secondary" %
8283                                      (self.op.remote_node, i.name),
8284                                      errors.ECODE_INVAL)
8285         result.append([i.name, self.op.remote_node])
8286     else:
8287       ial = IAllocator(self.cfg, self.rpc,
8288                        mode=constants.IALLOCATOR_MODE_MEVAC,
8289                        evac_nodes=self.op.nodes)
8290       ial.Run(self.op.iallocator, validate=True)
8291       if not ial.success:
8292         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8293                                  errors.ECODE_NORES)
8294       result = ial.result
8295     return result
8296
8297
8298 class LUGrowDisk(LogicalUnit):
8299   """Grow a disk of an instance.
8300
8301   """
8302   HPATH = "disk-grow"
8303   HTYPE = constants.HTYPE_INSTANCE
8304   _OP_PARAMS = [
8305     _PInstanceName,
8306     ("disk", ht.NoDefault, ht.TInt),
8307     ("amount", ht.NoDefault, ht.TInt),
8308     ("wait_for_sync", True, ht.TBool),
8309     ]
8310   REQ_BGL = False
8311
8312   def ExpandNames(self):
8313     self._ExpandAndLockInstance()
8314     self.needed_locks[locking.LEVEL_NODE] = []
8315     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8316
8317   def DeclareLocks(self, level):
8318     if level == locking.LEVEL_NODE:
8319       self._LockInstancesNodes()
8320
8321   def BuildHooksEnv(self):
8322     """Build hooks env.
8323
8324     This runs on the master, the primary and all the secondaries.
8325
8326     """
8327     env = {
8328       "DISK": self.op.disk,
8329       "AMOUNT": self.op.amount,
8330       }
8331     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8332     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8333     return env, nl, nl
8334
8335   def CheckPrereq(self):
8336     """Check prerequisites.
8337
8338     This checks that the instance is in the cluster.
8339
8340     """
8341     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8342     assert instance is not None, \
8343       "Cannot retrieve locked instance %s" % self.op.instance_name
8344     nodenames = list(instance.all_nodes)
8345     for node in nodenames:
8346       _CheckNodeOnline(self, node)
8347
8348     self.instance = instance
8349
8350     if instance.disk_template not in constants.DTS_GROWABLE:
8351       raise errors.OpPrereqError("Instance's disk layout does not support"
8352                                  " growing.", errors.ECODE_INVAL)
8353
8354     self.disk = instance.FindDisk(self.op.disk)
8355
8356     if instance.disk_template != constants.DT_FILE:
8357       # TODO: check the free disk space for file, when that feature will be
8358       # supported
8359       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8360
8361   def Exec(self, feedback_fn):
8362     """Execute disk grow.
8363
8364     """
8365     instance = self.instance
8366     disk = self.disk
8367
8368     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8369     if not disks_ok:
8370       raise errors.OpExecError("Cannot activate block device to grow")
8371
8372     for node in instance.all_nodes:
8373       self.cfg.SetDiskID(disk, node)
8374       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8375       result.Raise("Grow request failed to node %s" % node)
8376
8377       # TODO: Rewrite code to work properly
8378       # DRBD goes into sync mode for a short amount of time after executing the
8379       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8380       # calling "resize" in sync mode fails. Sleeping for a short amount of
8381       # time is a work-around.
8382       time.sleep(5)
8383
8384     disk.RecordGrow(self.op.amount)
8385     self.cfg.Update(instance, feedback_fn)
8386     if self.op.wait_for_sync:
8387       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8388       if disk_abort:
8389         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8390                              " status.\nPlease check the instance.")
8391       if not instance.admin_up:
8392         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8393     elif not instance.admin_up:
8394       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8395                            " not supposed to be running because no wait for"
8396                            " sync mode was requested.")
8397
8398
8399 class LUQueryInstanceData(NoHooksLU):
8400   """Query runtime instance data.
8401
8402   """
8403   _OP_PARAMS = [
8404     ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
8405     ("static", False, ht.TBool),
8406     ]
8407   REQ_BGL = False
8408
8409   def ExpandNames(self):
8410     self.needed_locks = {}
8411     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8412
8413     if self.op.instances:
8414       self.wanted_names = []
8415       for name in self.op.instances:
8416         full_name = _ExpandInstanceName(self.cfg, name)
8417         self.wanted_names.append(full_name)
8418       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8419     else:
8420       self.wanted_names = None
8421       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8422
8423     self.needed_locks[locking.LEVEL_NODE] = []
8424     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8425
8426   def DeclareLocks(self, level):
8427     if level == locking.LEVEL_NODE:
8428       self._LockInstancesNodes()
8429
8430   def CheckPrereq(self):
8431     """Check prerequisites.
8432
8433     This only checks the optional instance list against the existing names.
8434
8435     """
8436     if self.wanted_names is None:
8437       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8438
8439     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8440                              in self.wanted_names]
8441
8442   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8443     """Returns the status of a block device
8444
8445     """
8446     if self.op.static or not node:
8447       return None
8448
8449     self.cfg.SetDiskID(dev, node)
8450
8451     result = self.rpc.call_blockdev_find(node, dev)
8452     if result.offline:
8453       return None
8454
8455     result.Raise("Can't compute disk status for %s" % instance_name)
8456
8457     status = result.payload
8458     if status is None:
8459       return None
8460
8461     return (status.dev_path, status.major, status.minor,
8462             status.sync_percent, status.estimated_time,
8463             status.is_degraded, status.ldisk_status)
8464
8465   def _ComputeDiskStatus(self, instance, snode, dev):
8466     """Compute block device status.
8467
8468     """
8469     if dev.dev_type in constants.LDS_DRBD:
8470       # we change the snode then (otherwise we use the one passed in)
8471       if dev.logical_id[0] == instance.primary_node:
8472         snode = dev.logical_id[1]
8473       else:
8474         snode = dev.logical_id[0]
8475
8476     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8477                                               instance.name, dev)
8478     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8479
8480     if dev.children:
8481       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8482                       for child in dev.children]
8483     else:
8484       dev_children = []
8485
8486     data = {
8487       "iv_name": dev.iv_name,
8488       "dev_type": dev.dev_type,
8489       "logical_id": dev.logical_id,
8490       "physical_id": dev.physical_id,
8491       "pstatus": dev_pstatus,
8492       "sstatus": dev_sstatus,
8493       "children": dev_children,
8494       "mode": dev.mode,
8495       "size": dev.size,
8496       }
8497
8498     return data
8499
8500   def Exec(self, feedback_fn):
8501     """Gather and return data"""
8502     result = {}
8503
8504     cluster = self.cfg.GetClusterInfo()
8505
8506     for instance in self.wanted_instances:
8507       if not self.op.static:
8508         remote_info = self.rpc.call_instance_info(instance.primary_node,
8509                                                   instance.name,
8510                                                   instance.hypervisor)
8511         remote_info.Raise("Error checking node %s" % instance.primary_node)
8512         remote_info = remote_info.payload
8513         if remote_info and "state" in remote_info:
8514           remote_state = "up"
8515         else:
8516           remote_state = "down"
8517       else:
8518         remote_state = None
8519       if instance.admin_up:
8520         config_state = "up"
8521       else:
8522         config_state = "down"
8523
8524       disks = [self._ComputeDiskStatus(instance, None, device)
8525                for device in instance.disks]
8526
8527       idict = {
8528         "name": instance.name,
8529         "config_state": config_state,
8530         "run_state": remote_state,
8531         "pnode": instance.primary_node,
8532         "snodes": instance.secondary_nodes,
8533         "os": instance.os,
8534         # this happens to be the same format used for hooks
8535         "nics": _NICListToTuple(self, instance.nics),
8536         "disk_template": instance.disk_template,
8537         "disks": disks,
8538         "hypervisor": instance.hypervisor,
8539         "network_port": instance.network_port,
8540         "hv_instance": instance.hvparams,
8541         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8542         "be_instance": instance.beparams,
8543         "be_actual": cluster.FillBE(instance),
8544         "os_instance": instance.osparams,
8545         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8546         "serial_no": instance.serial_no,
8547         "mtime": instance.mtime,
8548         "ctime": instance.ctime,
8549         "uuid": instance.uuid,
8550         }
8551
8552       result[instance.name] = idict
8553
8554     return result
8555
8556
8557 class LUSetInstanceParams(LogicalUnit):
8558   """Modifies an instances's parameters.
8559
8560   """
8561   HPATH = "instance-modify"
8562   HTYPE = constants.HTYPE_INSTANCE
8563   _OP_PARAMS = [
8564     _PInstanceName,
8565     ("nics", ht.EmptyList, ht.TList),
8566     ("disks", ht.EmptyList, ht.TList),
8567     ("beparams", ht.EmptyDict, ht.TDict),
8568     ("hvparams", ht.EmptyDict, ht.TDict),
8569     ("disk_template", None, ht.TMaybeString),
8570     ("remote_node", None, ht.TMaybeString),
8571     ("os_name", None, ht.TMaybeString),
8572     ("force_variant", False, ht.TBool),
8573     ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
8574     _PForce,
8575     ]
8576   REQ_BGL = False
8577
8578   def CheckArguments(self):
8579     if not (self.op.nics or self.op.disks or self.op.disk_template or
8580             self.op.hvparams or self.op.beparams or self.op.os_name):
8581       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8582
8583     if self.op.hvparams:
8584       _CheckGlobalHvParams(self.op.hvparams)
8585
8586     # Disk validation
8587     disk_addremove = 0
8588     for disk_op, disk_dict in self.op.disks:
8589       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8590       if disk_op == constants.DDM_REMOVE:
8591         disk_addremove += 1
8592         continue
8593       elif disk_op == constants.DDM_ADD:
8594         disk_addremove += 1
8595       else:
8596         if not isinstance(disk_op, int):
8597           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8598         if not isinstance(disk_dict, dict):
8599           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8600           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8601
8602       if disk_op == constants.DDM_ADD:
8603         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8604         if mode not in constants.DISK_ACCESS_SET:
8605           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8606                                      errors.ECODE_INVAL)
8607         size = disk_dict.get('size', None)
8608         if size is None:
8609           raise errors.OpPrereqError("Required disk parameter size missing",
8610                                      errors.ECODE_INVAL)
8611         try:
8612           size = int(size)
8613         except (TypeError, ValueError), err:
8614           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8615                                      str(err), errors.ECODE_INVAL)
8616         disk_dict['size'] = size
8617       else:
8618         # modification of disk
8619         if 'size' in disk_dict:
8620           raise errors.OpPrereqError("Disk size change not possible, use"
8621                                      " grow-disk", errors.ECODE_INVAL)
8622
8623     if disk_addremove > 1:
8624       raise errors.OpPrereqError("Only one disk add or remove operation"
8625                                  " supported at a time", errors.ECODE_INVAL)
8626
8627     if self.op.disks and self.op.disk_template is not None:
8628       raise errors.OpPrereqError("Disk template conversion and other disk"
8629                                  " changes not supported at the same time",
8630                                  errors.ECODE_INVAL)
8631
8632     if self.op.disk_template:
8633       _CheckDiskTemplate(self.op.disk_template)
8634       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8635           self.op.remote_node is None):
8636         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8637                                    " one requires specifying a secondary node",
8638                                    errors.ECODE_INVAL)
8639
8640     # NIC validation
8641     nic_addremove = 0
8642     for nic_op, nic_dict in self.op.nics:
8643       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8644       if nic_op == constants.DDM_REMOVE:
8645         nic_addremove += 1
8646         continue
8647       elif nic_op == constants.DDM_ADD:
8648         nic_addremove += 1
8649       else:
8650         if not isinstance(nic_op, int):
8651           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8652         if not isinstance(nic_dict, dict):
8653           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8654           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8655
8656       # nic_dict should be a dict
8657       nic_ip = nic_dict.get('ip', None)
8658       if nic_ip is not None:
8659         if nic_ip.lower() == constants.VALUE_NONE:
8660           nic_dict['ip'] = None
8661         else:
8662           if not netutils.IPAddress.IsValid(nic_ip):
8663             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8664                                        errors.ECODE_INVAL)
8665
8666       nic_bridge = nic_dict.get('bridge', None)
8667       nic_link = nic_dict.get('link', None)
8668       if nic_bridge and nic_link:
8669         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8670                                    " at the same time", errors.ECODE_INVAL)
8671       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8672         nic_dict['bridge'] = None
8673       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8674         nic_dict['link'] = None
8675
8676       if nic_op == constants.DDM_ADD:
8677         nic_mac = nic_dict.get('mac', None)
8678         if nic_mac is None:
8679           nic_dict['mac'] = constants.VALUE_AUTO
8680
8681       if 'mac' in nic_dict:
8682         nic_mac = nic_dict['mac']
8683         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8684           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8685
8686         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8687           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8688                                      " modifying an existing nic",
8689                                      errors.ECODE_INVAL)
8690
8691     if nic_addremove > 1:
8692       raise errors.OpPrereqError("Only one NIC add or remove operation"
8693                                  " supported at a time", errors.ECODE_INVAL)
8694
8695   def ExpandNames(self):
8696     self._ExpandAndLockInstance()
8697     self.needed_locks[locking.LEVEL_NODE] = []
8698     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8699
8700   def DeclareLocks(self, level):
8701     if level == locking.LEVEL_NODE:
8702       self._LockInstancesNodes()
8703       if self.op.disk_template and self.op.remote_node:
8704         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8705         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8706
8707   def BuildHooksEnv(self):
8708     """Build hooks env.
8709
8710     This runs on the master, primary and secondaries.
8711
8712     """
8713     args = dict()
8714     if constants.BE_MEMORY in self.be_new:
8715       args['memory'] = self.be_new[constants.BE_MEMORY]
8716     if constants.BE_VCPUS in self.be_new:
8717       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8718     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8719     # information at all.
8720     if self.op.nics:
8721       args['nics'] = []
8722       nic_override = dict(self.op.nics)
8723       for idx, nic in enumerate(self.instance.nics):
8724         if idx in nic_override:
8725           this_nic_override = nic_override[idx]
8726         else:
8727           this_nic_override = {}
8728         if 'ip' in this_nic_override:
8729           ip = this_nic_override['ip']
8730         else:
8731           ip = nic.ip
8732         if 'mac' in this_nic_override:
8733           mac = this_nic_override['mac']
8734         else:
8735           mac = nic.mac
8736         if idx in self.nic_pnew:
8737           nicparams = self.nic_pnew[idx]
8738         else:
8739           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8740         mode = nicparams[constants.NIC_MODE]
8741         link = nicparams[constants.NIC_LINK]
8742         args['nics'].append((ip, mac, mode, link))
8743       if constants.DDM_ADD in nic_override:
8744         ip = nic_override[constants.DDM_ADD].get('ip', None)
8745         mac = nic_override[constants.DDM_ADD]['mac']
8746         nicparams = self.nic_pnew[constants.DDM_ADD]
8747         mode = nicparams[constants.NIC_MODE]
8748         link = nicparams[constants.NIC_LINK]
8749         args['nics'].append((ip, mac, mode, link))
8750       elif constants.DDM_REMOVE in nic_override:
8751         del args['nics'][-1]
8752
8753     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8754     if self.op.disk_template:
8755       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8756     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8757     return env, nl, nl
8758
8759   def CheckPrereq(self):
8760     """Check prerequisites.
8761
8762     This only checks the instance list against the existing names.
8763
8764     """
8765     # checking the new params on the primary/secondary nodes
8766
8767     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8768     cluster = self.cluster = self.cfg.GetClusterInfo()
8769     assert self.instance is not None, \
8770       "Cannot retrieve locked instance %s" % self.op.instance_name
8771     pnode = instance.primary_node
8772     nodelist = list(instance.all_nodes)
8773
8774     # OS change
8775     if self.op.os_name and not self.op.force:
8776       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8777                       self.op.force_variant)
8778       instance_os = self.op.os_name
8779     else:
8780       instance_os = instance.os
8781
8782     if self.op.disk_template:
8783       if instance.disk_template == self.op.disk_template:
8784         raise errors.OpPrereqError("Instance already has disk template %s" %
8785                                    instance.disk_template, errors.ECODE_INVAL)
8786
8787       if (instance.disk_template,
8788           self.op.disk_template) not in self._DISK_CONVERSIONS:
8789         raise errors.OpPrereqError("Unsupported disk template conversion from"
8790                                    " %s to %s" % (instance.disk_template,
8791                                                   self.op.disk_template),
8792                                    errors.ECODE_INVAL)
8793       _CheckInstanceDown(self, instance, "cannot change disk template")
8794       if self.op.disk_template in constants.DTS_NET_MIRROR:
8795         if self.op.remote_node == pnode:
8796           raise errors.OpPrereqError("Given new secondary node %s is the same"
8797                                      " as the primary node of the instance" %
8798                                      self.op.remote_node, errors.ECODE_STATE)
8799         _CheckNodeOnline(self, self.op.remote_node)
8800         _CheckNodeNotDrained(self, self.op.remote_node)
8801         disks = [{"size": d.size} for d in instance.disks]
8802         required = _ComputeDiskSize(self.op.disk_template, disks)
8803         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8804
8805     # hvparams processing
8806     if self.op.hvparams:
8807       hv_type = instance.hypervisor
8808       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8809       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8810       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8811
8812       # local check
8813       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8814       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8815       self.hv_new = hv_new # the new actual values
8816       self.hv_inst = i_hvdict # the new dict (without defaults)
8817     else:
8818       self.hv_new = self.hv_inst = {}
8819
8820     # beparams processing
8821     if self.op.beparams:
8822       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8823                                    use_none=True)
8824       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8825       be_new = cluster.SimpleFillBE(i_bedict)
8826       self.be_new = be_new # the new actual values
8827       self.be_inst = i_bedict # the new dict (without defaults)
8828     else:
8829       self.be_new = self.be_inst = {}
8830
8831     # osparams processing
8832     if self.op.osparams:
8833       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8834       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8835       self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8836       self.os_inst = i_osdict # the new dict (without defaults)
8837     else:
8838       self.os_new = self.os_inst = {}
8839
8840     self.warn = []
8841
8842     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8843       mem_check_list = [pnode]
8844       if be_new[constants.BE_AUTO_BALANCE]:
8845         # either we changed auto_balance to yes or it was from before
8846         mem_check_list.extend(instance.secondary_nodes)
8847       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8848                                                   instance.hypervisor)
8849       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8850                                          instance.hypervisor)
8851       pninfo = nodeinfo[pnode]
8852       msg = pninfo.fail_msg
8853       if msg:
8854         # Assume the primary node is unreachable and go ahead
8855         self.warn.append("Can't get info from primary node %s: %s" %
8856                          (pnode,  msg))
8857       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8858         self.warn.append("Node data from primary node %s doesn't contain"
8859                          " free memory information" % pnode)
8860       elif instance_info.fail_msg:
8861         self.warn.append("Can't get instance runtime information: %s" %
8862                         instance_info.fail_msg)
8863       else:
8864         if instance_info.payload:
8865           current_mem = int(instance_info.payload['memory'])
8866         else:
8867           # Assume instance not running
8868           # (there is a slight race condition here, but it's not very probable,
8869           # and we have no other way to check)
8870           current_mem = 0
8871         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8872                     pninfo.payload['memory_free'])
8873         if miss_mem > 0:
8874           raise errors.OpPrereqError("This change will prevent the instance"
8875                                      " from starting, due to %d MB of memory"
8876                                      " missing on its primary node" % miss_mem,
8877                                      errors.ECODE_NORES)
8878
8879       if be_new[constants.BE_AUTO_BALANCE]:
8880         for node, nres in nodeinfo.items():
8881           if node not in instance.secondary_nodes:
8882             continue
8883           msg = nres.fail_msg
8884           if msg:
8885             self.warn.append("Can't get info from secondary node %s: %s" %
8886                              (node, msg))
8887           elif not isinstance(nres.payload.get('memory_free', None), int):
8888             self.warn.append("Secondary node %s didn't return free"
8889                              " memory information" % node)
8890           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8891             self.warn.append("Not enough memory to failover instance to"
8892                              " secondary node %s" % node)
8893
8894     # NIC processing
8895     self.nic_pnew = {}
8896     self.nic_pinst = {}
8897     for nic_op, nic_dict in self.op.nics:
8898       if nic_op == constants.DDM_REMOVE:
8899         if not instance.nics:
8900           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8901                                      errors.ECODE_INVAL)
8902         continue
8903       if nic_op != constants.DDM_ADD:
8904         # an existing nic
8905         if not instance.nics:
8906           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8907                                      " no NICs" % nic_op,
8908                                      errors.ECODE_INVAL)
8909         if nic_op < 0 or nic_op >= len(instance.nics):
8910           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8911                                      " are 0 to %d" %
8912                                      (nic_op, len(instance.nics) - 1),
8913                                      errors.ECODE_INVAL)
8914         old_nic_params = instance.nics[nic_op].nicparams
8915         old_nic_ip = instance.nics[nic_op].ip
8916       else:
8917         old_nic_params = {}
8918         old_nic_ip = None
8919
8920       update_params_dict = dict([(key, nic_dict[key])
8921                                  for key in constants.NICS_PARAMETERS
8922                                  if key in nic_dict])
8923
8924       if 'bridge' in nic_dict:
8925         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8926
8927       new_nic_params = _GetUpdatedParams(old_nic_params,
8928                                          update_params_dict)
8929       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8930       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8931       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8932       self.nic_pinst[nic_op] = new_nic_params
8933       self.nic_pnew[nic_op] = new_filled_nic_params
8934       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8935
8936       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8937         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8938         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8939         if msg:
8940           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8941           if self.op.force:
8942             self.warn.append(msg)
8943           else:
8944             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8945       if new_nic_mode == constants.NIC_MODE_ROUTED:
8946         if 'ip' in nic_dict:
8947           nic_ip = nic_dict['ip']
8948         else:
8949           nic_ip = old_nic_ip
8950         if nic_ip is None:
8951           raise errors.OpPrereqError('Cannot set the nic ip to None'
8952                                      ' on a routed nic', errors.ECODE_INVAL)
8953       if 'mac' in nic_dict:
8954         nic_mac = nic_dict['mac']
8955         if nic_mac is None:
8956           raise errors.OpPrereqError('Cannot set the nic mac to None',
8957                                      errors.ECODE_INVAL)
8958         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8959           # otherwise generate the mac
8960           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8961         else:
8962           # or validate/reserve the current one
8963           try:
8964             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8965           except errors.ReservationError:
8966             raise errors.OpPrereqError("MAC address %s already in use"
8967                                        " in cluster" % nic_mac,
8968                                        errors.ECODE_NOTUNIQUE)
8969
8970     # DISK processing
8971     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8972       raise errors.OpPrereqError("Disk operations not supported for"
8973                                  " diskless instances",
8974                                  errors.ECODE_INVAL)
8975     for disk_op, _ in self.op.disks:
8976       if disk_op == constants.DDM_REMOVE:
8977         if len(instance.disks) == 1:
8978           raise errors.OpPrereqError("Cannot remove the last disk of"
8979                                      " an instance", errors.ECODE_INVAL)
8980         _CheckInstanceDown(self, instance, "cannot remove disks")
8981
8982       if (disk_op == constants.DDM_ADD and
8983           len(instance.nics) >= constants.MAX_DISKS):
8984         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8985                                    " add more" % constants.MAX_DISKS,
8986                                    errors.ECODE_STATE)
8987       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8988         # an existing disk
8989         if disk_op < 0 or disk_op >= len(instance.disks):
8990           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8991                                      " are 0 to %d" %
8992                                      (disk_op, len(instance.disks)),
8993                                      errors.ECODE_INVAL)
8994
8995     return
8996
8997   def _ConvertPlainToDrbd(self, feedback_fn):
8998     """Converts an instance from plain to drbd.
8999
9000     """
9001     feedback_fn("Converting template to drbd")
9002     instance = self.instance
9003     pnode = instance.primary_node
9004     snode = self.op.remote_node
9005
9006     # create a fake disk info for _GenerateDiskTemplate
9007     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
9008     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
9009                                       instance.name, pnode, [snode],
9010                                       disk_info, None, None, 0)
9011     info = _GetInstanceInfoText(instance)
9012     feedback_fn("Creating aditional volumes...")
9013     # first, create the missing data and meta devices
9014     for disk in new_disks:
9015       # unfortunately this is... not too nice
9016       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9017                             info, True)
9018       for child in disk.children:
9019         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9020     # at this stage, all new LVs have been created, we can rename the
9021     # old ones
9022     feedback_fn("Renaming original volumes...")
9023     rename_list = [(o, n.children[0].logical_id)
9024                    for (o, n) in zip(instance.disks, new_disks)]
9025     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9026     result.Raise("Failed to rename original LVs")
9027
9028     feedback_fn("Initializing DRBD devices...")
9029     # all child devices are in place, we can now create the DRBD devices
9030     for disk in new_disks:
9031       for node in [pnode, snode]:
9032         f_create = node == pnode
9033         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9034
9035     # at this point, the instance has been modified
9036     instance.disk_template = constants.DT_DRBD8
9037     instance.disks = new_disks
9038     self.cfg.Update(instance, feedback_fn)
9039
9040     # disks are created, waiting for sync
9041     disk_abort = not _WaitForSync(self, instance)
9042     if disk_abort:
9043       raise errors.OpExecError("There are some degraded disks for"
9044                                " this instance, please cleanup manually")
9045
9046   def _ConvertDrbdToPlain(self, feedback_fn):
9047     """Converts an instance from drbd to plain.
9048
9049     """
9050     instance = self.instance
9051     assert len(instance.secondary_nodes) == 1
9052     pnode = instance.primary_node
9053     snode = instance.secondary_nodes[0]
9054     feedback_fn("Converting template to plain")
9055
9056     old_disks = instance.disks
9057     new_disks = [d.children[0] for d in old_disks]
9058
9059     # copy over size and mode
9060     for parent, child in zip(old_disks, new_disks):
9061       child.size = parent.size
9062       child.mode = parent.mode
9063
9064     # update instance structure
9065     instance.disks = new_disks
9066     instance.disk_template = constants.DT_PLAIN
9067     self.cfg.Update(instance, feedback_fn)
9068
9069     feedback_fn("Removing volumes on the secondary node...")
9070     for disk in old_disks:
9071       self.cfg.SetDiskID(disk, snode)
9072       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9073       if msg:
9074         self.LogWarning("Could not remove block device %s on node %s,"
9075                         " continuing anyway: %s", disk.iv_name, snode, msg)
9076
9077     feedback_fn("Removing unneeded volumes on the primary node...")
9078     for idx, disk in enumerate(old_disks):
9079       meta = disk.children[1]
9080       self.cfg.SetDiskID(meta, pnode)
9081       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9082       if msg:
9083         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9084                         " continuing anyway: %s", idx, pnode, msg)
9085
9086
9087   def Exec(self, feedback_fn):
9088     """Modifies an instance.
9089
9090     All parameters take effect only at the next restart of the instance.
9091
9092     """
9093     # Process here the warnings from CheckPrereq, as we don't have a
9094     # feedback_fn there.
9095     for warn in self.warn:
9096       feedback_fn("WARNING: %s" % warn)
9097
9098     result = []
9099     instance = self.instance
9100     # disk changes
9101     for disk_op, disk_dict in self.op.disks:
9102       if disk_op == constants.DDM_REMOVE:
9103         # remove the last disk
9104         device = instance.disks.pop()
9105         device_idx = len(instance.disks)
9106         for node, disk in device.ComputeNodeTree(instance.primary_node):
9107           self.cfg.SetDiskID(disk, node)
9108           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9109           if msg:
9110             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9111                             " continuing anyway", device_idx, node, msg)
9112         result.append(("disk/%d" % device_idx, "remove"))
9113       elif disk_op == constants.DDM_ADD:
9114         # add a new disk
9115         if instance.disk_template == constants.DT_FILE:
9116           file_driver, file_path = instance.disks[0].logical_id
9117           file_path = os.path.dirname(file_path)
9118         else:
9119           file_driver = file_path = None
9120         disk_idx_base = len(instance.disks)
9121         new_disk = _GenerateDiskTemplate(self,
9122                                          instance.disk_template,
9123                                          instance.name, instance.primary_node,
9124                                          instance.secondary_nodes,
9125                                          [disk_dict],
9126                                          file_path,
9127                                          file_driver,
9128                                          disk_idx_base)[0]
9129         instance.disks.append(new_disk)
9130         info = _GetInstanceInfoText(instance)
9131
9132         logging.info("Creating volume %s for instance %s",
9133                      new_disk.iv_name, instance.name)
9134         # Note: this needs to be kept in sync with _CreateDisks
9135         #HARDCODE
9136         for node in instance.all_nodes:
9137           f_create = node == instance.primary_node
9138           try:
9139             _CreateBlockDev(self, node, instance, new_disk,
9140                             f_create, info, f_create)
9141           except errors.OpExecError, err:
9142             self.LogWarning("Failed to create volume %s (%s) on"
9143                             " node %s: %s",
9144                             new_disk.iv_name, new_disk, node, err)
9145         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9146                        (new_disk.size, new_disk.mode)))
9147       else:
9148         # change a given disk
9149         instance.disks[disk_op].mode = disk_dict['mode']
9150         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9151
9152     if self.op.disk_template:
9153       r_shut = _ShutdownInstanceDisks(self, instance)
9154       if not r_shut:
9155         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9156                                  " proceed with disk template conversion")
9157       mode = (instance.disk_template, self.op.disk_template)
9158       try:
9159         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9160       except:
9161         self.cfg.ReleaseDRBDMinors(instance.name)
9162         raise
9163       result.append(("disk_template", self.op.disk_template))
9164
9165     # NIC changes
9166     for nic_op, nic_dict in self.op.nics:
9167       if nic_op == constants.DDM_REMOVE:
9168         # remove the last nic
9169         del instance.nics[-1]
9170         result.append(("nic.%d" % len(instance.nics), "remove"))
9171       elif nic_op == constants.DDM_ADD:
9172         # mac and bridge should be set, by now
9173         mac = nic_dict['mac']
9174         ip = nic_dict.get('ip', None)
9175         nicparams = self.nic_pinst[constants.DDM_ADD]
9176         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9177         instance.nics.append(new_nic)
9178         result.append(("nic.%d" % (len(instance.nics) - 1),
9179                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9180                        (new_nic.mac, new_nic.ip,
9181                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9182                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9183                        )))
9184       else:
9185         for key in 'mac', 'ip':
9186           if key in nic_dict:
9187             setattr(instance.nics[nic_op], key, nic_dict[key])
9188         if nic_op in self.nic_pinst:
9189           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9190         for key, val in nic_dict.iteritems():
9191           result.append(("nic.%s/%d" % (key, nic_op), val))
9192
9193     # hvparams changes
9194     if self.op.hvparams:
9195       instance.hvparams = self.hv_inst
9196       for key, val in self.op.hvparams.iteritems():
9197         result.append(("hv/%s" % key, val))
9198
9199     # beparams changes
9200     if self.op.beparams:
9201       instance.beparams = self.be_inst
9202       for key, val in self.op.beparams.iteritems():
9203         result.append(("be/%s" % key, val))
9204
9205     # OS change
9206     if self.op.os_name:
9207       instance.os = self.op.os_name
9208
9209     # osparams changes
9210     if self.op.osparams:
9211       instance.osparams = self.os_inst
9212       for key, val in self.op.osparams.iteritems():
9213         result.append(("os/%s" % key, val))
9214
9215     self.cfg.Update(instance, feedback_fn)
9216
9217     return result
9218
9219   _DISK_CONVERSIONS = {
9220     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9221     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9222     }
9223
9224
9225 class LUQueryExports(NoHooksLU):
9226   """Query the exports list
9227
9228   """
9229   _OP_PARAMS = [
9230     ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9231     ("use_locking", False, ht.TBool),
9232     ]
9233   REQ_BGL = False
9234
9235   def ExpandNames(self):
9236     self.needed_locks = {}
9237     self.share_locks[locking.LEVEL_NODE] = 1
9238     if not self.op.nodes:
9239       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9240     else:
9241       self.needed_locks[locking.LEVEL_NODE] = \
9242         _GetWantedNodes(self, self.op.nodes)
9243
9244   def Exec(self, feedback_fn):
9245     """Compute the list of all the exported system images.
9246
9247     @rtype: dict
9248     @return: a dictionary with the structure node->(export-list)
9249         where export-list is a list of the instances exported on
9250         that node.
9251
9252     """
9253     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9254     rpcresult = self.rpc.call_export_list(self.nodes)
9255     result = {}
9256     for node in rpcresult:
9257       if rpcresult[node].fail_msg:
9258         result[node] = False
9259       else:
9260         result[node] = rpcresult[node].payload
9261
9262     return result
9263
9264
9265 class LUPrepareExport(NoHooksLU):
9266   """Prepares an instance for an export and returns useful information.
9267
9268   """
9269   _OP_PARAMS = [
9270     _PInstanceName,
9271     ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9272     ]
9273   REQ_BGL = False
9274
9275   def ExpandNames(self):
9276     self._ExpandAndLockInstance()
9277
9278   def CheckPrereq(self):
9279     """Check prerequisites.
9280
9281     """
9282     instance_name = self.op.instance_name
9283
9284     self.instance = self.cfg.GetInstanceInfo(instance_name)
9285     assert self.instance is not None, \
9286           "Cannot retrieve locked instance %s" % self.op.instance_name
9287     _CheckNodeOnline(self, self.instance.primary_node)
9288
9289     self._cds = _GetClusterDomainSecret()
9290
9291   def Exec(self, feedback_fn):
9292     """Prepares an instance for an export.
9293
9294     """
9295     instance = self.instance
9296
9297     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9298       salt = utils.GenerateSecret(8)
9299
9300       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9301       result = self.rpc.call_x509_cert_create(instance.primary_node,
9302                                               constants.RIE_CERT_VALIDITY)
9303       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9304
9305       (name, cert_pem) = result.payload
9306
9307       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9308                                              cert_pem)
9309
9310       return {
9311         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9312         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9313                           salt),
9314         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9315         }
9316
9317     return None
9318
9319
9320 class LUExportInstance(LogicalUnit):
9321   """Export an instance to an image in the cluster.
9322
9323   """
9324   HPATH = "instance-export"
9325   HTYPE = constants.HTYPE_INSTANCE
9326   _OP_PARAMS = [
9327     _PInstanceName,
9328     ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9329     ("shutdown", True, ht.TBool),
9330     _PShutdownTimeout,
9331     ("remove_instance", False, ht.TBool),
9332     ("ignore_remove_failures", False, ht.TBool),
9333     ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9334     ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9335     ("destination_x509_ca", None, ht.TMaybeString),
9336     ]
9337   REQ_BGL = False
9338
9339   def CheckArguments(self):
9340     """Check the arguments.
9341
9342     """
9343     self.x509_key_name = self.op.x509_key_name
9344     self.dest_x509_ca_pem = self.op.destination_x509_ca
9345
9346     if self.op.remove_instance and not self.op.shutdown:
9347       raise errors.OpPrereqError("Can not remove instance without shutting it"
9348                                  " down before")
9349
9350     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9351       if not self.x509_key_name:
9352         raise errors.OpPrereqError("Missing X509 key name for encryption",
9353                                    errors.ECODE_INVAL)
9354
9355       if not self.dest_x509_ca_pem:
9356         raise errors.OpPrereqError("Missing destination X509 CA",
9357                                    errors.ECODE_INVAL)
9358
9359   def ExpandNames(self):
9360     self._ExpandAndLockInstance()
9361
9362     # Lock all nodes for local exports
9363     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9364       # FIXME: lock only instance primary and destination node
9365       #
9366       # Sad but true, for now we have do lock all nodes, as we don't know where
9367       # the previous export might be, and in this LU we search for it and
9368       # remove it from its current node. In the future we could fix this by:
9369       #  - making a tasklet to search (share-lock all), then create the
9370       #    new one, then one to remove, after
9371       #  - removing the removal operation altogether
9372       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9373
9374   def DeclareLocks(self, level):
9375     """Last minute lock declaration."""
9376     # All nodes are locked anyway, so nothing to do here.
9377
9378   def BuildHooksEnv(self):
9379     """Build hooks env.
9380
9381     This will run on the master, primary node and target node.
9382
9383     """
9384     env = {
9385       "EXPORT_MODE": self.op.mode,
9386       "EXPORT_NODE": self.op.target_node,
9387       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9388       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9389       # TODO: Generic function for boolean env variables
9390       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9391       }
9392
9393     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9394
9395     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9396
9397     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9398       nl.append(self.op.target_node)
9399
9400     return env, nl, nl
9401
9402   def CheckPrereq(self):
9403     """Check prerequisites.
9404
9405     This checks that the instance and node names are valid.
9406
9407     """
9408     instance_name = self.op.instance_name
9409
9410     self.instance = self.cfg.GetInstanceInfo(instance_name)
9411     assert self.instance is not None, \
9412           "Cannot retrieve locked instance %s" % self.op.instance_name
9413     _CheckNodeOnline(self, self.instance.primary_node)
9414
9415     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9416       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9417       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9418       assert self.dst_node is not None
9419
9420       _CheckNodeOnline(self, self.dst_node.name)
9421       _CheckNodeNotDrained(self, self.dst_node.name)
9422
9423       self._cds = None
9424       self.dest_disk_info = None
9425       self.dest_x509_ca = None
9426
9427     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9428       self.dst_node = None
9429
9430       if len(self.op.target_node) != len(self.instance.disks):
9431         raise errors.OpPrereqError(("Received destination information for %s"
9432                                     " disks, but instance %s has %s disks") %
9433                                    (len(self.op.target_node), instance_name,
9434                                     len(self.instance.disks)),
9435                                    errors.ECODE_INVAL)
9436
9437       cds = _GetClusterDomainSecret()
9438
9439       # Check X509 key name
9440       try:
9441         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9442       except (TypeError, ValueError), err:
9443         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9444
9445       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9446         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9447                                    errors.ECODE_INVAL)
9448
9449       # Load and verify CA
9450       try:
9451         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9452       except OpenSSL.crypto.Error, err:
9453         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9454                                    (err, ), errors.ECODE_INVAL)
9455
9456       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9457       if errcode is not None:
9458         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9459                                    (msg, ), errors.ECODE_INVAL)
9460
9461       self.dest_x509_ca = cert
9462
9463       # Verify target information
9464       disk_info = []
9465       for idx, disk_data in enumerate(self.op.target_node):
9466         try:
9467           (host, port, magic) = \
9468             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9469         except errors.GenericError, err:
9470           raise errors.OpPrereqError("Target info for disk %s: %s" %
9471                                      (idx, err), errors.ECODE_INVAL)
9472
9473         disk_info.append((host, port, magic))
9474
9475       assert len(disk_info) == len(self.op.target_node)
9476       self.dest_disk_info = disk_info
9477
9478     else:
9479       raise errors.ProgrammerError("Unhandled export mode %r" %
9480                                    self.op.mode)
9481
9482     # instance disk type verification
9483     # TODO: Implement export support for file-based disks
9484     for disk in self.instance.disks:
9485       if disk.dev_type == constants.LD_FILE:
9486         raise errors.OpPrereqError("Export not supported for instances with"
9487                                    " file-based disks", errors.ECODE_INVAL)
9488
9489   def _CleanupExports(self, feedback_fn):
9490     """Removes exports of current instance from all other nodes.
9491
9492     If an instance in a cluster with nodes A..D was exported to node C, its
9493     exports will be removed from the nodes A, B and D.
9494
9495     """
9496     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9497
9498     nodelist = self.cfg.GetNodeList()
9499     nodelist.remove(self.dst_node.name)
9500
9501     # on one-node clusters nodelist will be empty after the removal
9502     # if we proceed the backup would be removed because OpQueryExports
9503     # substitutes an empty list with the full cluster node list.
9504     iname = self.instance.name
9505     if nodelist:
9506       feedback_fn("Removing old exports for instance %s" % iname)
9507       exportlist = self.rpc.call_export_list(nodelist)
9508       for node in exportlist:
9509         if exportlist[node].fail_msg:
9510           continue
9511         if iname in exportlist[node].payload:
9512           msg = self.rpc.call_export_remove(node, iname).fail_msg
9513           if msg:
9514             self.LogWarning("Could not remove older export for instance %s"
9515                             " on node %s: %s", iname, node, msg)
9516
9517   def Exec(self, feedback_fn):
9518     """Export an instance to an image in the cluster.
9519
9520     """
9521     assert self.op.mode in constants.EXPORT_MODES
9522
9523     instance = self.instance
9524     src_node = instance.primary_node
9525
9526     if self.op.shutdown:
9527       # shutdown the instance, but not the disks
9528       feedback_fn("Shutting down instance %s" % instance.name)
9529       result = self.rpc.call_instance_shutdown(src_node, instance,
9530                                                self.op.shutdown_timeout)
9531       # TODO: Maybe ignore failures if ignore_remove_failures is set
9532       result.Raise("Could not shutdown instance %s on"
9533                    " node %s" % (instance.name, src_node))
9534
9535     # set the disks ID correctly since call_instance_start needs the
9536     # correct drbd minor to create the symlinks
9537     for disk in instance.disks:
9538       self.cfg.SetDiskID(disk, src_node)
9539
9540     activate_disks = (not instance.admin_up)
9541
9542     if activate_disks:
9543       # Activate the instance disks if we'exporting a stopped instance
9544       feedback_fn("Activating disks for %s" % instance.name)
9545       _StartInstanceDisks(self, instance, None)
9546
9547     try:
9548       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9549                                                      instance)
9550
9551       helper.CreateSnapshots()
9552       try:
9553         if (self.op.shutdown and instance.admin_up and
9554             not self.op.remove_instance):
9555           assert not activate_disks
9556           feedback_fn("Starting instance %s" % instance.name)
9557           result = self.rpc.call_instance_start(src_node, instance, None, None)
9558           msg = result.fail_msg
9559           if msg:
9560             feedback_fn("Failed to start instance: %s" % msg)
9561             _ShutdownInstanceDisks(self, instance)
9562             raise errors.OpExecError("Could not start instance: %s" % msg)
9563
9564         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9565           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9566         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9567           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9568           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9569
9570           (key_name, _, _) = self.x509_key_name
9571
9572           dest_ca_pem = \
9573             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9574                                             self.dest_x509_ca)
9575
9576           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9577                                                      key_name, dest_ca_pem,
9578                                                      timeouts)
9579       finally:
9580         helper.Cleanup()
9581
9582       # Check for backwards compatibility
9583       assert len(dresults) == len(instance.disks)
9584       assert compat.all(isinstance(i, bool) for i in dresults), \
9585              "Not all results are boolean: %r" % dresults
9586
9587     finally:
9588       if activate_disks:
9589         feedback_fn("Deactivating disks for %s" % instance.name)
9590         _ShutdownInstanceDisks(self, instance)
9591
9592     if not (compat.all(dresults) and fin_resu):
9593       failures = []
9594       if not fin_resu:
9595         failures.append("export finalization")
9596       if not compat.all(dresults):
9597         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9598                                if not dsk)
9599         failures.append("disk export: disk(s) %s" % fdsk)
9600
9601       raise errors.OpExecError("Export failed, errors in %s" %
9602                                utils.CommaJoin(failures))
9603
9604     # At this point, the export was successful, we can cleanup/finish
9605
9606     # Remove instance if requested
9607     if self.op.remove_instance:
9608       feedback_fn("Removing instance %s" % instance.name)
9609       _RemoveInstance(self, feedback_fn, instance,
9610                       self.op.ignore_remove_failures)
9611
9612     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9613       self._CleanupExports(feedback_fn)
9614
9615     return fin_resu, dresults
9616
9617
9618 class LURemoveExport(NoHooksLU):
9619   """Remove exports related to the named instance.
9620
9621   """
9622   _OP_PARAMS = [
9623     _PInstanceName,
9624     ]
9625   REQ_BGL = False
9626
9627   def ExpandNames(self):
9628     self.needed_locks = {}
9629     # We need all nodes to be locked in order for RemoveExport to work, but we
9630     # don't need to lock the instance itself, as nothing will happen to it (and
9631     # we can remove exports also for a removed instance)
9632     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9633
9634   def Exec(self, feedback_fn):
9635     """Remove any export.
9636
9637     """
9638     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9639     # If the instance was not found we'll try with the name that was passed in.
9640     # This will only work if it was an FQDN, though.
9641     fqdn_warn = False
9642     if not instance_name:
9643       fqdn_warn = True
9644       instance_name = self.op.instance_name
9645
9646     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9647     exportlist = self.rpc.call_export_list(locked_nodes)
9648     found = False
9649     for node in exportlist:
9650       msg = exportlist[node].fail_msg
9651       if msg:
9652         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9653         continue
9654       if instance_name in exportlist[node].payload:
9655         found = True
9656         result = self.rpc.call_export_remove(node, instance_name)
9657         msg = result.fail_msg
9658         if msg:
9659           logging.error("Could not remove export for instance %s"
9660                         " on node %s: %s", instance_name, node, msg)
9661
9662     if fqdn_warn and not found:
9663       feedback_fn("Export not found. If trying to remove an export belonging"
9664                   " to a deleted instance please use its Fully Qualified"
9665                   " Domain Name.")
9666
9667
9668 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9669   """Generic tags LU.
9670
9671   This is an abstract class which is the parent of all the other tags LUs.
9672
9673   """
9674
9675   def ExpandNames(self):
9676     self.needed_locks = {}
9677     if self.op.kind == constants.TAG_NODE:
9678       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9679       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9680     elif self.op.kind == constants.TAG_INSTANCE:
9681       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9682       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9683
9684     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
9685     # not possible to acquire the BGL based on opcode parameters)
9686
9687   def CheckPrereq(self):
9688     """Check prerequisites.
9689
9690     """
9691     if self.op.kind == constants.TAG_CLUSTER:
9692       self.target = self.cfg.GetClusterInfo()
9693     elif self.op.kind == constants.TAG_NODE:
9694       self.target = self.cfg.GetNodeInfo(self.op.name)
9695     elif self.op.kind == constants.TAG_INSTANCE:
9696       self.target = self.cfg.GetInstanceInfo(self.op.name)
9697     else:
9698       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9699                                  str(self.op.kind), errors.ECODE_INVAL)
9700
9701
9702 class LUGetTags(TagsLU):
9703   """Returns the tags of a given object.
9704
9705   """
9706   _OP_PARAMS = [
9707     ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
9708     # Name is only meaningful for nodes and instances
9709     ("name", ht.NoDefault, ht.TMaybeString),
9710     ]
9711   REQ_BGL = False
9712
9713   def ExpandNames(self):
9714     TagsLU.ExpandNames(self)
9715
9716     # Share locks as this is only a read operation
9717     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9718
9719   def Exec(self, feedback_fn):
9720     """Returns the tag list.
9721
9722     """
9723     return list(self.target.GetTags())
9724
9725
9726 class LUSearchTags(NoHooksLU):
9727   """Searches the tags for a given pattern.
9728
9729   """
9730   _OP_PARAMS = [
9731     ("pattern", ht.NoDefault, ht.TNonEmptyString),
9732     ]
9733   REQ_BGL = False
9734
9735   def ExpandNames(self):
9736     self.needed_locks = {}
9737
9738   def CheckPrereq(self):
9739     """Check prerequisites.
9740
9741     This checks the pattern passed for validity by compiling it.
9742
9743     """
9744     try:
9745       self.re = re.compile(self.op.pattern)
9746     except re.error, err:
9747       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9748                                  (self.op.pattern, err), errors.ECODE_INVAL)
9749
9750   def Exec(self, feedback_fn):
9751     """Returns the tag list.
9752
9753     """
9754     cfg = self.cfg
9755     tgts = [("/cluster", cfg.GetClusterInfo())]
9756     ilist = cfg.GetAllInstancesInfo().values()
9757     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9758     nlist = cfg.GetAllNodesInfo().values()
9759     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9760     results = []
9761     for path, target in tgts:
9762       for tag in target.GetTags():
9763         if self.re.search(tag):
9764           results.append((path, tag))
9765     return results
9766
9767
9768 class LUAddTags(TagsLU):
9769   """Sets a tag on a given object.
9770
9771   """
9772   _OP_PARAMS = [
9773     ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
9774     # Name is only meaningful for nodes and instances
9775     ("name", ht.NoDefault, ht.TMaybeString),
9776     ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
9777     ]
9778   REQ_BGL = False
9779
9780   def CheckPrereq(self):
9781     """Check prerequisites.
9782
9783     This checks the type and length of the tag name and value.
9784
9785     """
9786     TagsLU.CheckPrereq(self)
9787     for tag in self.op.tags:
9788       objects.TaggableObject.ValidateTag(tag)
9789
9790   def Exec(self, feedback_fn):
9791     """Sets the tag.
9792
9793     """
9794     try:
9795       for tag in self.op.tags:
9796         self.target.AddTag(tag)
9797     except errors.TagError, err:
9798       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9799     self.cfg.Update(self.target, feedback_fn)
9800
9801
9802 class LUDelTags(TagsLU):
9803   """Delete a list of tags from a given object.
9804
9805   """
9806   _OP_PARAMS = [
9807     ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
9808     # Name is only meaningful for nodes and instances
9809     ("name", ht.NoDefault, ht.TMaybeString),
9810     ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
9811     ]
9812   REQ_BGL = False
9813
9814   def CheckPrereq(self):
9815     """Check prerequisites.
9816
9817     This checks that we have the given tag.
9818
9819     """
9820     TagsLU.CheckPrereq(self)
9821     for tag in self.op.tags:
9822       objects.TaggableObject.ValidateTag(tag)
9823     del_tags = frozenset(self.op.tags)
9824     cur_tags = self.target.GetTags()
9825
9826     diff_tags = del_tags - cur_tags
9827     if diff_tags:
9828       diff_names = ("'%s'" % i for i in sorted(diff_tags))
9829       raise errors.OpPrereqError("Tag(s) %s not found" %
9830                                  (utils.CommaJoin(diff_names), ),
9831                                  errors.ECODE_NOENT)
9832
9833   def Exec(self, feedback_fn):
9834     """Remove the tag from the object.
9835
9836     """
9837     for tag in self.op.tags:
9838       self.target.RemoveTag(tag)
9839     self.cfg.Update(self.target, feedback_fn)
9840
9841
9842 class LUTestDelay(NoHooksLU):
9843   """Sleep for a specified amount of time.
9844
9845   This LU sleeps on the master and/or nodes for a specified amount of
9846   time.
9847
9848   """
9849   _OP_PARAMS = [
9850     ("duration", ht.NoDefault, ht.TFloat),
9851     ("on_master", True, ht.TBool),
9852     ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9853     ("repeat", 0, ht.TPositiveInt)
9854     ]
9855   REQ_BGL = False
9856
9857   def ExpandNames(self):
9858     """Expand names and set required locks.
9859
9860     This expands the node list, if any.
9861
9862     """
9863     self.needed_locks = {}
9864     if self.op.on_nodes:
9865       # _GetWantedNodes can be used here, but is not always appropriate to use
9866       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9867       # more information.
9868       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9869       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9870
9871   def _TestDelay(self):
9872     """Do the actual sleep.
9873
9874     """
9875     if self.op.on_master:
9876       if not utils.TestDelay(self.op.duration):
9877         raise errors.OpExecError("Error during master delay test")
9878     if self.op.on_nodes:
9879       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9880       for node, node_result in result.items():
9881         node_result.Raise("Failure during rpc call to node %s" % node)
9882
9883   def Exec(self, feedback_fn):
9884     """Execute the test delay opcode, with the wanted repetitions.
9885
9886     """
9887     if self.op.repeat == 0:
9888       self._TestDelay()
9889     else:
9890       top_value = self.op.repeat - 1
9891       for i in range(self.op.repeat):
9892         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9893         self._TestDelay()
9894
9895
9896 class LUTestJobqueue(NoHooksLU):
9897   """Utility LU to test some aspects of the job queue.
9898
9899   """
9900   _OP_PARAMS = [
9901     ("notify_waitlock", False, ht.TBool),
9902     ("notify_exec", False, ht.TBool),
9903     ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
9904     ("fail", False, ht.TBool),
9905     ]
9906   REQ_BGL = False
9907
9908   # Must be lower than default timeout for WaitForJobChange to see whether it
9909   # notices changed jobs
9910   _CLIENT_CONNECT_TIMEOUT = 20.0
9911   _CLIENT_CONFIRM_TIMEOUT = 60.0
9912
9913   @classmethod
9914   def _NotifyUsingSocket(cls, cb, errcls):
9915     """Opens a Unix socket and waits for another program to connect.
9916
9917     @type cb: callable
9918     @param cb: Callback to send socket name to client
9919     @type errcls: class
9920     @param errcls: Exception class to use for errors
9921
9922     """
9923     # Using a temporary directory as there's no easy way to create temporary
9924     # sockets without writing a custom loop around tempfile.mktemp and
9925     # socket.bind
9926     tmpdir = tempfile.mkdtemp()
9927     try:
9928       tmpsock = utils.PathJoin(tmpdir, "sock")
9929
9930       logging.debug("Creating temporary socket at %s", tmpsock)
9931       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
9932       try:
9933         sock.bind(tmpsock)
9934         sock.listen(1)
9935
9936         # Send details to client
9937         cb(tmpsock)
9938
9939         # Wait for client to connect before continuing
9940         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
9941         try:
9942           (conn, _) = sock.accept()
9943         except socket.error, err:
9944           raise errcls("Client didn't connect in time (%s)" % err)
9945       finally:
9946         sock.close()
9947     finally:
9948       # Remove as soon as client is connected
9949       shutil.rmtree(tmpdir)
9950
9951     # Wait for client to close
9952     try:
9953       try:
9954         # pylint: disable-msg=E1101
9955         # Instance of '_socketobject' has no ... member
9956         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
9957         conn.recv(1)
9958       except socket.error, err:
9959         raise errcls("Client failed to confirm notification (%s)" % err)
9960     finally:
9961       conn.close()
9962
9963   def _SendNotification(self, test, arg, sockname):
9964     """Sends a notification to the client.
9965
9966     @type test: string
9967     @param test: Test name
9968     @param arg: Test argument (depends on test)
9969     @type sockname: string
9970     @param sockname: Socket path
9971
9972     """
9973     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
9974
9975   def _Notify(self, prereq, test, arg):
9976     """Notifies the client of a test.
9977
9978     @type prereq: bool
9979     @param prereq: Whether this is a prereq-phase test
9980     @type test: string
9981     @param test: Test name
9982     @param arg: Test argument (depends on test)
9983
9984     """
9985     if prereq:
9986       errcls = errors.OpPrereqError
9987     else:
9988       errcls = errors.OpExecError
9989
9990     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
9991                                                   test, arg),
9992                                    errcls)
9993
9994   def CheckArguments(self):
9995     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
9996     self.expandnames_calls = 0
9997
9998   def ExpandNames(self):
9999     checkargs_calls = getattr(self, "checkargs_calls", 0)
10000     if checkargs_calls < 1:
10001       raise errors.ProgrammerError("CheckArguments was not called")
10002
10003     self.expandnames_calls += 1
10004
10005     if self.op.notify_waitlock:
10006       self._Notify(True, constants.JQT_EXPANDNAMES, None)
10007
10008     self.LogInfo("Expanding names")
10009
10010     # Get lock on master node (just to get a lock, not for a particular reason)
10011     self.needed_locks = {
10012       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
10013       }
10014
10015   def Exec(self, feedback_fn):
10016     if self.expandnames_calls < 1:
10017       raise errors.ProgrammerError("ExpandNames was not called")
10018
10019     if self.op.notify_exec:
10020       self._Notify(False, constants.JQT_EXEC, None)
10021
10022     self.LogInfo("Executing")
10023
10024     if self.op.log_messages:
10025       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10026       for idx, msg in enumerate(self.op.log_messages):
10027         self.LogInfo("Sending log message %s", idx + 1)
10028         feedback_fn(constants.JQT_MSGPREFIX + msg)
10029         # Report how many test messages have been sent
10030         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10031
10032     if self.op.fail:
10033       raise errors.OpExecError("Opcode failure was requested")
10034
10035     return True
10036
10037
10038 class IAllocator(object):
10039   """IAllocator framework.
10040
10041   An IAllocator instance has three sets of attributes:
10042     - cfg that is needed to query the cluster
10043     - input data (all members of the _KEYS class attribute are required)
10044     - four buffer attributes (in|out_data|text), that represent the
10045       input (to the external script) in text and data structure format,
10046       and the output from it, again in two formats
10047     - the result variables from the script (success, info, nodes) for
10048       easy usage
10049
10050   """
10051   # pylint: disable-msg=R0902
10052   # lots of instance attributes
10053   _ALLO_KEYS = [
10054     "name", "mem_size", "disks", "disk_template",
10055     "os", "tags", "nics", "vcpus", "hypervisor",
10056     ]
10057   _RELO_KEYS = [
10058     "name", "relocate_from",
10059     ]
10060   _EVAC_KEYS = [
10061     "evac_nodes",
10062     ]
10063
10064   def __init__(self, cfg, rpc, mode, **kwargs):
10065     self.cfg = cfg
10066     self.rpc = rpc
10067     # init buffer variables
10068     self.in_text = self.out_text = self.in_data = self.out_data = None
10069     # init all input fields so that pylint is happy
10070     self.mode = mode
10071     self.mem_size = self.disks = self.disk_template = None
10072     self.os = self.tags = self.nics = self.vcpus = None
10073     self.hypervisor = None
10074     self.relocate_from = None
10075     self.name = None
10076     self.evac_nodes = None
10077     # computed fields
10078     self.required_nodes = None
10079     # init result fields
10080     self.success = self.info = self.result = None
10081     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10082       keyset = self._ALLO_KEYS
10083       fn = self._AddNewInstance
10084     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10085       keyset = self._RELO_KEYS
10086       fn = self._AddRelocateInstance
10087     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10088       keyset = self._EVAC_KEYS
10089       fn = self._AddEvacuateNodes
10090     else:
10091       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10092                                    " IAllocator" % self.mode)
10093     for key in kwargs:
10094       if key not in keyset:
10095         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10096                                      " IAllocator" % key)
10097       setattr(self, key, kwargs[key])
10098
10099     for key in keyset:
10100       if key not in kwargs:
10101         raise errors.ProgrammerError("Missing input parameter '%s' to"
10102                                      " IAllocator" % key)
10103     self._BuildInputData(fn)
10104
10105   def _ComputeClusterData(self):
10106     """Compute the generic allocator input data.
10107
10108     This is the data that is independent of the actual operation.
10109
10110     """
10111     cfg = self.cfg
10112     cluster_info = cfg.GetClusterInfo()
10113     # cluster data
10114     data = {
10115       "version": constants.IALLOCATOR_VERSION,
10116       "cluster_name": cfg.GetClusterName(),
10117       "cluster_tags": list(cluster_info.GetTags()),
10118       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10119       # we don't have job IDs
10120       }
10121     iinfo = cfg.GetAllInstancesInfo().values()
10122     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10123
10124     # node data
10125     node_results = {}
10126     node_list = cfg.GetNodeList()
10127
10128     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10129       hypervisor_name = self.hypervisor
10130     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10131       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10132     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10133       hypervisor_name = cluster_info.enabled_hypervisors[0]
10134
10135     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10136                                         hypervisor_name)
10137     node_iinfo = \
10138       self.rpc.call_all_instances_info(node_list,
10139                                        cluster_info.enabled_hypervisors)
10140     for nname, nresult in node_data.items():
10141       # first fill in static (config-based) values
10142       ninfo = cfg.GetNodeInfo(nname)
10143       pnr = {
10144         "tags": list(ninfo.GetTags()),
10145         "primary_ip": ninfo.primary_ip,
10146         "secondary_ip": ninfo.secondary_ip,
10147         "offline": ninfo.offline,
10148         "drained": ninfo.drained,
10149         "master_candidate": ninfo.master_candidate,
10150         }
10151
10152       if not (ninfo.offline or ninfo.drained):
10153         nresult.Raise("Can't get data for node %s" % nname)
10154         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10155                                 nname)
10156         remote_info = nresult.payload
10157
10158         for attr in ['memory_total', 'memory_free', 'memory_dom0',
10159                      'vg_size', 'vg_free', 'cpu_total']:
10160           if attr not in remote_info:
10161             raise errors.OpExecError("Node '%s' didn't return attribute"
10162                                      " '%s'" % (nname, attr))
10163           if not isinstance(remote_info[attr], int):
10164             raise errors.OpExecError("Node '%s' returned invalid value"
10165                                      " for '%s': %s" %
10166                                      (nname, attr, remote_info[attr]))
10167         # compute memory used by primary instances
10168         i_p_mem = i_p_up_mem = 0
10169         for iinfo, beinfo in i_list:
10170           if iinfo.primary_node == nname:
10171             i_p_mem += beinfo[constants.BE_MEMORY]
10172             if iinfo.name not in node_iinfo[nname].payload:
10173               i_used_mem = 0
10174             else:
10175               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10176             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10177             remote_info['memory_free'] -= max(0, i_mem_diff)
10178
10179             if iinfo.admin_up:
10180               i_p_up_mem += beinfo[constants.BE_MEMORY]
10181
10182         # compute memory used by instances
10183         pnr_dyn = {
10184           "total_memory": remote_info['memory_total'],
10185           "reserved_memory": remote_info['memory_dom0'],
10186           "free_memory": remote_info['memory_free'],
10187           "total_disk": remote_info['vg_size'],
10188           "free_disk": remote_info['vg_free'],
10189           "total_cpus": remote_info['cpu_total'],
10190           "i_pri_memory": i_p_mem,
10191           "i_pri_up_memory": i_p_up_mem,
10192           }
10193         pnr.update(pnr_dyn)
10194
10195       node_results[nname] = pnr
10196     data["nodes"] = node_results
10197
10198     # instance data
10199     instance_data = {}
10200     for iinfo, beinfo in i_list:
10201       nic_data = []
10202       for nic in iinfo.nics:
10203         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10204         nic_dict = {"mac": nic.mac,
10205                     "ip": nic.ip,
10206                     "mode": filled_params[constants.NIC_MODE],
10207                     "link": filled_params[constants.NIC_LINK],
10208                    }
10209         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10210           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10211         nic_data.append(nic_dict)
10212       pir = {
10213         "tags": list(iinfo.GetTags()),
10214         "admin_up": iinfo.admin_up,
10215         "vcpus": beinfo[constants.BE_VCPUS],
10216         "memory": beinfo[constants.BE_MEMORY],
10217         "os": iinfo.os,
10218         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10219         "nics": nic_data,
10220         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10221         "disk_template": iinfo.disk_template,
10222         "hypervisor": iinfo.hypervisor,
10223         }
10224       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10225                                                  pir["disks"])
10226       instance_data[iinfo.name] = pir
10227
10228     data["instances"] = instance_data
10229
10230     self.in_data = data
10231
10232   def _AddNewInstance(self):
10233     """Add new instance data to allocator structure.
10234
10235     This in combination with _AllocatorGetClusterData will create the
10236     correct structure needed as input for the allocator.
10237
10238     The checks for the completeness of the opcode must have already been
10239     done.
10240
10241     """
10242     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10243
10244     if self.disk_template in constants.DTS_NET_MIRROR:
10245       self.required_nodes = 2
10246     else:
10247       self.required_nodes = 1
10248     request = {
10249       "name": self.name,
10250       "disk_template": self.disk_template,
10251       "tags": self.tags,
10252       "os": self.os,
10253       "vcpus": self.vcpus,
10254       "memory": self.mem_size,
10255       "disks": self.disks,
10256       "disk_space_total": disk_space,
10257       "nics": self.nics,
10258       "required_nodes": self.required_nodes,
10259       }
10260     return request
10261
10262   def _AddRelocateInstance(self):
10263     """Add relocate instance data to allocator structure.
10264
10265     This in combination with _IAllocatorGetClusterData will create the
10266     correct structure needed as input for the allocator.
10267
10268     The checks for the completeness of the opcode must have already been
10269     done.
10270
10271     """
10272     instance = self.cfg.GetInstanceInfo(self.name)
10273     if instance is None:
10274       raise errors.ProgrammerError("Unknown instance '%s' passed to"
10275                                    " IAllocator" % self.name)
10276
10277     if instance.disk_template not in constants.DTS_NET_MIRROR:
10278       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10279                                  errors.ECODE_INVAL)
10280
10281     if len(instance.secondary_nodes) != 1:
10282       raise errors.OpPrereqError("Instance has not exactly one secondary node",
10283                                  errors.ECODE_STATE)
10284
10285     self.required_nodes = 1
10286     disk_sizes = [{'size': disk.size} for disk in instance.disks]
10287     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10288
10289     request = {
10290       "name": self.name,
10291       "disk_space_total": disk_space,
10292       "required_nodes": self.required_nodes,
10293       "relocate_from": self.relocate_from,
10294       }
10295     return request
10296
10297   def _AddEvacuateNodes(self):
10298     """Add evacuate nodes data to allocator structure.
10299
10300     """
10301     request = {
10302       "evac_nodes": self.evac_nodes
10303       }
10304     return request
10305
10306   def _BuildInputData(self, fn):
10307     """Build input data structures.
10308
10309     """
10310     self._ComputeClusterData()
10311
10312     request = fn()
10313     request["type"] = self.mode
10314     self.in_data["request"] = request
10315
10316     self.in_text = serializer.Dump(self.in_data)
10317
10318   def Run(self, name, validate=True, call_fn=None):
10319     """Run an instance allocator and return the results.
10320
10321     """
10322     if call_fn is None:
10323       call_fn = self.rpc.call_iallocator_runner
10324
10325     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10326     result.Raise("Failure while running the iallocator script")
10327
10328     self.out_text = result.payload
10329     if validate:
10330       self._ValidateResult()
10331
10332   def _ValidateResult(self):
10333     """Process the allocator results.
10334
10335     This will process and if successful save the result in
10336     self.out_data and the other parameters.
10337
10338     """
10339     try:
10340       rdict = serializer.Load(self.out_text)
10341     except Exception, err:
10342       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10343
10344     if not isinstance(rdict, dict):
10345       raise errors.OpExecError("Can't parse iallocator results: not a dict")
10346
10347     # TODO: remove backwards compatiblity in later versions
10348     if "nodes" in rdict and "result" not in rdict:
10349       rdict["result"] = rdict["nodes"]
10350       del rdict["nodes"]
10351
10352     for key in "success", "info", "result":
10353       if key not in rdict:
10354         raise errors.OpExecError("Can't parse iallocator results:"
10355                                  " missing key '%s'" % key)
10356       setattr(self, key, rdict[key])
10357
10358     if not isinstance(rdict["result"], list):
10359       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10360                                " is not a list")
10361     self.out_data = rdict
10362
10363
10364 class LUTestAllocator(NoHooksLU):
10365   """Run allocator tests.
10366
10367   This LU runs the allocator tests
10368
10369   """
10370   _OP_PARAMS = [
10371     ("direction", ht.NoDefault,
10372      ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10373     ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
10374     ("name", ht.NoDefault, ht.TNonEmptyString),
10375     ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
10376       ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
10377                ht.TOr(ht.TNone, ht.TNonEmptyString))))),
10378     ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
10379     ("hypervisor", None, ht.TMaybeString),
10380     ("allocator", None, ht.TMaybeString),
10381     ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10382     ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10383     ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10384     ("os", None, ht.TMaybeString),
10385     ("disk_template", None, ht.TMaybeString),
10386     ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
10387     ]
10388
10389   def CheckPrereq(self):
10390     """Check prerequisites.
10391
10392     This checks the opcode parameters depending on the director and mode test.
10393
10394     """
10395     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10396       for attr in ["mem_size", "disks", "disk_template",
10397                    "os", "tags", "nics", "vcpus"]:
10398         if not hasattr(self.op, attr):
10399           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10400                                      attr, errors.ECODE_INVAL)
10401       iname = self.cfg.ExpandInstanceName(self.op.name)
10402       if iname is not None:
10403         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10404                                    iname, errors.ECODE_EXISTS)
10405       if not isinstance(self.op.nics, list):
10406         raise errors.OpPrereqError("Invalid parameter 'nics'",
10407                                    errors.ECODE_INVAL)
10408       if not isinstance(self.op.disks, list):
10409         raise errors.OpPrereqError("Invalid parameter 'disks'",
10410                                    errors.ECODE_INVAL)
10411       for row in self.op.disks:
10412         if (not isinstance(row, dict) or
10413             "size" not in row or
10414             not isinstance(row["size"], int) or
10415             "mode" not in row or
10416             row["mode"] not in ['r', 'w']):
10417           raise errors.OpPrereqError("Invalid contents of the 'disks'"
10418                                      " parameter", errors.ECODE_INVAL)
10419       if self.op.hypervisor is None:
10420         self.op.hypervisor = self.cfg.GetHypervisorType()
10421     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10422       fname = _ExpandInstanceName(self.cfg, self.op.name)
10423       self.op.name = fname
10424       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10425     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10426       if not hasattr(self.op, "evac_nodes"):
10427         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10428                                    " opcode input", errors.ECODE_INVAL)
10429     else:
10430       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10431                                  self.op.mode, errors.ECODE_INVAL)
10432
10433     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10434       if self.op.allocator is None:
10435         raise errors.OpPrereqError("Missing allocator name",
10436                                    errors.ECODE_INVAL)
10437     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10438       raise errors.OpPrereqError("Wrong allocator test '%s'" %
10439                                  self.op.direction, errors.ECODE_INVAL)
10440
10441   def Exec(self, feedback_fn):
10442     """Run the allocator test.
10443
10444     """
10445     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10446       ial = IAllocator(self.cfg, self.rpc,
10447                        mode=self.op.mode,
10448                        name=self.op.name,
10449                        mem_size=self.op.mem_size,
10450                        disks=self.op.disks,
10451                        disk_template=self.op.disk_template,
10452                        os=self.op.os,
10453                        tags=self.op.tags,
10454                        nics=self.op.nics,
10455                        vcpus=self.op.vcpus,
10456                        hypervisor=self.op.hypervisor,
10457                        )
10458     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10459       ial = IAllocator(self.cfg, self.rpc,
10460                        mode=self.op.mode,
10461                        name=self.op.name,
10462                        relocate_from=list(self.relocate_from),
10463                        )
10464     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10465       ial = IAllocator(self.cfg, self.rpc,
10466                        mode=self.op.mode,
10467                        evac_nodes=self.op.evac_nodes)
10468     else:
10469       raise errors.ProgrammerError("Uncatched mode %s in"
10470                                    " LUTestAllocator.Exec", self.op.mode)
10471
10472     if self.op.direction == constants.IALLOCATOR_DIR_IN:
10473       result = ial.in_text
10474     else:
10475       ial.Run(self.op.allocator, validate=False)
10476       result = ial.out_text
10477     return result