code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42
  43 from ganeti import ssh
  44 from ganeti import utils
  45 from ganeti import errors
  46 from ganeti import hypervisor
  47 from ganeti import locking
  48 from ganeti import constants
  49 from ganeti import objects
  50 from ganeti import serializer
  51 from ganeti import ssconf
  52 from ganeti import uidpool
  53 from ganeti import compat
  54 from ganeti import masterd
  55 from ganeti import netutils
  56 from ganeti import ht
  57
  58 import ganeti.masterd.instance # pylint: disable-msg=W0611
  59
  60 # Common opcode attributes
  61
  62 #: output fields for a query operation
  63 _POutputFields = ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString))
  64
  65
  66 #: the shutdown timeout
  67 _PShutdownTimeout = ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT,
  68                      ht.TPositiveInt)
  69
  70 #: the force parameter
  71 _PForce = ("force", False, ht.TBool)
  72
  73 #: a required instance name (for single-instance LUs)
  74 _PInstanceName = ("instance_name", ht.NoDefault, ht.TNonEmptyString)
  75
  76 #: Whether to ignore offline nodes
  77 _PIgnoreOfflineNodes = ("ignore_offline_nodes", False, ht.TBool)
  78
  79 #: a required node name (for single-node LUs)
  80 _PNodeName = ("node_name", ht.NoDefault, ht.TNonEmptyString)
  81
  82 #: the migration type (live/non-live)
  83 _PMigrationMode = ("mode", None,
  84                    ht.TOr(ht.TNone, ht.TElemOf(constants.HT_MIGRATION_MODES)))
  85
  86 #: the obsolete 'live' mode (boolean)
  87 _PMigrationLive = ("live", None, ht.TMaybeBool)
  88
  89
  90 # End types
  91 class LogicalUnit(object):
  92   """Logical Unit base class.
  93
  94   Subclasses must follow these rules:
  95     - implement ExpandNames
  96     - implement CheckPrereq (except when tasklets are used)
  97     - implement Exec (except when tasklets are used)
  98     - implement BuildHooksEnv
  99     - redefine HPATH and HTYPE
 100     - optionally redefine their run requirements:
 101         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 102
 103   Note that all commands require root permissions.
 104
 105   @ivar dry_run_result: the value (if any) that will be returned to the caller
 106       in dry-run mode (signalled by opcode dry_run parameter)
 107   @cvar _OP_PARAMS: a list of opcode attributes, their defaults values
 108       they should get if not already defined, and types they must match
 109
 110   """
 111   HPATH = None
 112   HTYPE = None
 113   _OP_PARAMS = []
 114   REQ_BGL = True
 115
 116   def __init__(self, processor, op, context, rpc):
 117     """Constructor for LogicalUnit.
 118
 119     This needs to be overridden in derived classes in order to check op
 120     validity.
 121
 122     """
 123     self.proc = processor
 124     self.op = op
 125     self.cfg = context.cfg
 126     self.context = context
 127     self.rpc = rpc
 128     # Dicts used to declare locking needs to mcpu
 129     self.needed_locks = None
 130     self.acquired_locks = {}
 131     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 132     self.add_locks = {}
 133     self.remove_locks = {}
 134     # Used to force good behavior when calling helper functions
 135     self.recalculate_locks = {}
 136     self.__ssh = None
 137     # logging
 138     self.Log = processor.Log # pylint: disable-msg=C0103
 139     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 140     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 141     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 142     # support for dry-run
 143     self.dry_run_result = None
 144     # support for generic debug attribute
 145     if (not hasattr(self.op, "debug_level") or
 146         not isinstance(self.op.debug_level, int)):
 147       self.op.debug_level = 0
 148
 149     # Tasklets
 150     self.tasklets = None
 151
 152     # The new kind-of-type-system
 153     op_id = self.op.OP_ID
 154     for attr_name, aval, test in self._OP_PARAMS:
 155       if not hasattr(op, attr_name):
 156         if aval == ht.NoDefault:
 157           raise errors.OpPrereqError("Required parameter '%s.%s' missing" %
 158                                      (op_id, attr_name), errors.ECODE_INVAL)
 159         else:
 160           if callable(aval):
 161             dval = aval()
 162           else:
 163             dval = aval
 164           setattr(self.op, attr_name, dval)
 165       attr_val = getattr(op, attr_name)
 166       if test == ht.NoType:
 167         # no tests here
 168         continue
 169       if not callable(test):
 170         raise errors.ProgrammerError("Validation for parameter '%s.%s' failed,"
 171                                      " given type is not a proper type (%s)" %
 172                                      (op_id, attr_name, test))
 173       if not test(attr_val):
 174         logging.error("OpCode %s, parameter %s, has invalid type %s/value %s",
 175                       self.op.OP_ID, attr_name, type(attr_val), attr_val)
 176         raise errors.OpPrereqError("Parameter '%s.%s' fails validation" %
 177                                    (op_id, attr_name), errors.ECODE_INVAL)
 178
 179     self.CheckArguments()
 180
 181   def __GetSSH(self):
 182     """Returns the SshRunner object
 183
 184     """
 185     if not self.__ssh:
 186       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 187     return self.__ssh
 188
 189   ssh = property(fget=__GetSSH)
 190
 191   def CheckArguments(self):
 192     """Check syntactic validity for the opcode arguments.
 193
 194     This method is for doing a simple syntactic check and ensure
 195     validity of opcode parameters, without any cluster-related
 196     checks. While the same can be accomplished in ExpandNames and/or
 197     CheckPrereq, doing these separate is better because:
 198
 199       - ExpandNames is left as as purely a lock-related function
 200       - CheckPrereq is run after we have acquired locks (and possible
 201         waited for them)
 202
 203     The function is allowed to change the self.op attribute so that
 204     later methods can no longer worry about missing parameters.
 205
 206     """
 207     pass
 208
 209   def ExpandNames(self):
 210     """Expand names for this LU.
 211
 212     This method is called before starting to execute the opcode, and it should
 213     update all the parameters of the opcode to their canonical form (e.g. a
 214     short node name must be fully expanded after this method has successfully
 215     completed). This way locking, hooks, logging, ecc. can work correctly.
 216
 217     LUs which implement this method must also populate the self.needed_locks
 218     member, as a dict with lock levels as keys, and a list of needed lock names
 219     as values. Rules:
 220
 221       - use an empty dict if you don't need any lock
 222       - if you don't need any lock at a particular level omit that level
 223       - don't put anything for the BGL level
 224       - if you want all locks at a level use locking.ALL_SET as a value
 225
 226     If you need to share locks (rather than acquire them exclusively) at one
 227     level you can modify self.share_locks, setting a true value (usually 1) for
 228     that level. By default locks are not shared.
 229
 230     This function can also define a list of tasklets, which then will be
 231     executed in order instead of the usual LU-level CheckPrereq and Exec
 232     functions, if those are not defined by the LU.
 233
 234     Examples::
 235
 236       # Acquire all nodes and one instance
 237       self.needed_locks = {
 238         locking.LEVEL_NODE: locking.ALL_SET,
 239         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 240       }
 241       # Acquire just two nodes
 242       self.needed_locks = {
 243         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 244       }
 245       # Acquire no locks
 246       self.needed_locks = {} # No, you can't leave it to the default value None
 247
 248     """
 249     # The implementation of this method is mandatory only if the new LU is
 250     # concurrent, so that old LUs don't need to be changed all at the same
 251     # time.
 252     if self.REQ_BGL:
 253       self.needed_locks = {} # Exclusive LUs don't need locks.
 254     else:
 255       raise NotImplementedError
 256
 257   def DeclareLocks(self, level):
 258     """Declare LU locking needs for a level
 259
 260     While most LUs can just declare their locking needs at ExpandNames time,
 261     sometimes there's the need to calculate some locks after having acquired
 262     the ones before. This function is called just before acquiring locks at a
 263     particular level, but after acquiring the ones at lower levels, and permits
 264     such calculations. It can be used to modify self.needed_locks, and by
 265     default it does nothing.
 266
 267     This function is only called if you have something already set in
 268     self.needed_locks for the level.
 269
 270     @param level: Locking level which is going to be locked
 271     @type level: member of ganeti.locking.LEVELS
 272
 273     """
 274
 275   def CheckPrereq(self):
 276     """Check prerequisites for this LU.
 277
 278     This method should check that the prerequisites for the execution
 279     of this LU are fulfilled. It can do internode communication, but
 280     it should be idempotent - no cluster or system changes are
 281     allowed.
 282
 283     The method should raise errors.OpPrereqError in case something is
 284     not fulfilled. Its return value is ignored.
 285
 286     This method should also update all the parameters of the opcode to
 287     their canonical form if it hasn't been done by ExpandNames before.
 288
 289     """
 290     if self.tasklets is not None:
 291       for (idx, tl) in enumerate(self.tasklets):
 292         logging.debug("Checking prerequisites for tasklet %s/%s",
 293                       idx + 1, len(self.tasklets))
 294         tl.CheckPrereq()
 295     else:
 296       pass
 297
 298   def Exec(self, feedback_fn):
 299     """Execute the LU.
 300
 301     This method should implement the actual work. It should raise
 302     errors.OpExecError for failures that are somewhat dealt with in
 303     code, or expected.
 304
 305     """
 306     if self.tasklets is not None:
 307       for (idx, tl) in enumerate(self.tasklets):
 308         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 309         tl.Exec(feedback_fn)
 310     else:
 311       raise NotImplementedError
 312
 313   def BuildHooksEnv(self):
 314     """Build hooks environment for this LU.
 315
 316     This method should return a three-node tuple consisting of: a dict
 317     containing the environment that will be used for running the
 318     specific hook for this LU, a list of node names on which the hook
 319     should run before the execution, and a list of node names on which
 320     the hook should run after the execution.
 321
 322     The keys of the dict must not have 'GANETI_' prefixed as this will
 323     be handled in the hooks runner. Also note additional keys will be
 324     added by the hooks runner. If the LU doesn't define any
 325     environment, an empty dict (and not None) should be returned.
 326
 327     No nodes should be returned as an empty list (and not None).
 328
 329     Note that if the HPATH for a LU class is None, this function will
 330     not be called.
 331
 332     """
 333     raise NotImplementedError
 334
 335   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 336     """Notify the LU about the results of its hooks.
 337
 338     This method is called every time a hooks phase is executed, and notifies
 339     the Logical Unit about the hooks' result. The LU can then use it to alter
 340     its result based on the hooks.  By default the method does nothing and the
 341     previous result is passed back unchanged but any LU can define it if it
 342     wants to use the local cluster hook-scripts somehow.
 343
 344     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 345         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 346     @param hook_results: the results of the multi-node hooks rpc call
 347     @param feedback_fn: function used send feedback back to the caller
 348     @param lu_result: the previous Exec result this LU had, or None
 349         in the PRE phase
 350     @return: the new Exec result, based on the previous result
 351         and hook results
 352
 353     """
 354     # API must be kept, thus we ignore the unused argument and could
 355     # be a function warnings
 356     # pylint: disable-msg=W0613,R0201
 357     return lu_result
 358
 359   def _ExpandAndLockInstance(self):
 360     """Helper function to expand and lock an instance.
 361
 362     Many LUs that work on an instance take its name in self.op.instance_name
 363     and need to expand it and then declare the expanded name for locking. This
 364     function does it, and then updates self.op.instance_name to the expanded
 365     name. It also initializes needed_locks as a dict, if this hasn't been done
 366     before.
 367
 368     """
 369     if self.needed_locks is None:
 370       self.needed_locks = {}
 371     else:
 372       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 373         "_ExpandAndLockInstance called with instance-level locks set"
 374     self.op.instance_name = _ExpandInstanceName(self.cfg,
 375                                                 self.op.instance_name)
 376     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 377
 378   def _LockInstancesNodes(self, primary_only=False):
 379     """Helper function to declare instances' nodes for locking.
 380
 381     This function should be called after locking one or more instances to lock
 382     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 383     with all primary or secondary nodes for instances already locked and
 384     present in self.needed_locks[locking.LEVEL_INSTANCE].
 385
 386     It should be called from DeclareLocks, and for safety only works if
 387     self.recalculate_locks[locking.LEVEL_NODE] is set.
 388
 389     In the future it may grow parameters to just lock some instance's nodes, or
 390     to just lock primaries or secondary nodes, if needed.
 391
 392     If should be called in DeclareLocks in a way similar to::
 393
 394       if level == locking.LEVEL_NODE:
 395         self._LockInstancesNodes()
 396
 397     @type primary_only: boolean
 398     @param primary_only: only lock primary nodes of locked instances
 399
 400     """
 401     assert locking.LEVEL_NODE in self.recalculate_locks, \
 402       "_LockInstancesNodes helper function called with no nodes to recalculate"
 403
 404     # TODO: check if we're really been called with the instance locks held
 405
 406     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 407     # future we might want to have different behaviors depending on the value
 408     # of self.recalculate_locks[locking.LEVEL_NODE]
 409     wanted_nodes = []
 410     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 411       instance = self.context.cfg.GetInstanceInfo(instance_name)
 412       wanted_nodes.append(instance.primary_node)
 413       if not primary_only:
 414         wanted_nodes.extend(instance.secondary_nodes)
 415
 416     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 417       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 418     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 419       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 420
 421     del self.recalculate_locks[locking.LEVEL_NODE]
 422
 423
 424 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 425   """Simple LU which runs no hooks.
 426
 427   This LU is intended as a parent for other LogicalUnits which will
 428   run no hooks, in order to reduce duplicate code.
 429
 430   """
 431   HPATH = None
 432   HTYPE = None
 433
 434   def BuildHooksEnv(self):
 435     """Empty BuildHooksEnv for NoHooksLu.
 436
 437     This just raises an error.
 438
 439     """
 440     assert False, "BuildHooksEnv called for NoHooksLUs"
 441
 442
 443 class Tasklet:
 444   """Tasklet base class.
 445
 446   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 447   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 448   tasklets know nothing about locks.
 449
 450   Subclasses must follow these rules:
 451     - Implement CheckPrereq
 452     - Implement Exec
 453
 454   """
 455   def __init__(self, lu):
 456     self.lu = lu
 457
 458     # Shortcuts
 459     self.cfg = lu.cfg
 460     self.rpc = lu.rpc
 461
 462   def CheckPrereq(self):
 463     """Check prerequisites for this tasklets.
 464
 465     This method should check whether the prerequisites for the execution of
 466     this tasklet are fulfilled. It can do internode communication, but it
 467     should be idempotent - no cluster or system changes are allowed.
 468
 469     The method should raise errors.OpPrereqError in case something is not
 470     fulfilled. Its return value is ignored.
 471
 472     This method should also update all parameters to their canonical form if it
 473     hasn't been done before.
 474
 475     """
 476     pass
 477
 478   def Exec(self, feedback_fn):
 479     """Execute the tasklet.
 480
 481     This method should implement the actual work. It should raise
 482     errors.OpExecError for failures that are somewhat dealt with in code, or
 483     expected.
 484
 485     """
 486     raise NotImplementedError
 487
 488
 489 def _GetWantedNodes(lu, nodes):
 490   """Returns list of checked and expanded node names.
 491
 492   @type lu: L{LogicalUnit}
 493   @param lu: the logical unit on whose behalf we execute
 494   @type nodes: list
 495   @param nodes: list of node names or None for all nodes
 496   @rtype: list
 497   @return: the list of nodes, sorted
 498   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 499
 500   """
 501   if not nodes:
 502     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 503       " non-empty list of nodes whose name is to be expanded.")
 504
 505   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 506   return utils.NiceSort(wanted)
 507
 508
 509 def _GetWantedInstances(lu, instances):
 510   """Returns list of checked and expanded instance names.
 511
 512   @type lu: L{LogicalUnit}
 513   @param lu: the logical unit on whose behalf we execute
 514   @type instances: list
 515   @param instances: list of instance names or None for all instances
 516   @rtype: list
 517   @return: the list of instances, sorted
 518   @raise errors.OpPrereqError: if the instances parameter is wrong type
 519   @raise errors.OpPrereqError: if any of the passed instances is not found
 520
 521   """
 522   if instances:
 523     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 524   else:
 525     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 526   return wanted
 527
 528
 529 def _GetUpdatedParams(old_params, update_dict,
 530                       use_default=True, use_none=False):
 531   """Return the new version of a parameter dictionary.
 532
 533   @type old_params: dict
 534   @param old_params: old parameters
 535   @type update_dict: dict
 536   @param update_dict: dict containing new parameter values, or
 537       constants.VALUE_DEFAULT to reset the parameter to its default
 538       value
 539   @param use_default: boolean
 540   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 541       values as 'to be deleted' values
 542   @param use_none: boolean
 543   @type use_none: whether to recognise C{None} values as 'to be
 544       deleted' values
 545   @rtype: dict
 546   @return: the new parameter dictionary
 547
 548   """
 549   params_copy = copy.deepcopy(old_params)
 550   for key, val in update_dict.iteritems():
 551     if ((use_default and val == constants.VALUE_DEFAULT) or
 552         (use_none and val is None)):
 553       try:
 554         del params_copy[key]
 555       except KeyError:
 556         pass
 557     else:
 558       params_copy[key] = val
 559   return params_copy
 560
 561
 562 def _CheckOutputFields(static, dynamic, selected):
 563   """Checks whether all selected fields are valid.
 564
 565   @type static: L{utils.FieldSet}
 566   @param static: static fields set
 567   @type dynamic: L{utils.FieldSet}
 568   @param dynamic: dynamic fields set
 569
 570   """
 571   f = utils.FieldSet()
 572   f.Extend(static)
 573   f.Extend(dynamic)
 574
 575   delta = f.NonMatching(selected)
 576   if delta:
 577     raise errors.OpPrereqError("Unknown output fields selected: %s"
 578                                % ",".join(delta), errors.ECODE_INVAL)
 579
 580
 581 def _CheckGlobalHvParams(params):
 582   """Validates that given hypervisor params are not global ones.
 583
 584   This will ensure that instances don't get customised versions of
 585   global params.
 586
 587   """
 588   used_globals = constants.HVC_GLOBALS.intersection(params)
 589   if used_globals:
 590     msg = ("The following hypervisor parameters are global and cannot"
 591            " be customized at instance level, please modify them at"
 592            " cluster level: %s" % utils.CommaJoin(used_globals))
 593     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 594
 595
 596 def _CheckNodeOnline(lu, node):
 597   """Ensure that a given node is online.
 598
 599   @param lu: the LU on behalf of which we make the check
 600   @param node: the node to check
 601   @raise errors.OpPrereqError: if the node is offline
 602
 603   """
 604   if lu.cfg.GetNodeInfo(node).offline:
 605     raise errors.OpPrereqError("Can't use offline node %s" % node,
 606                                errors.ECODE_INVAL)
 607
 608
 609 def _CheckNodeNotDrained(lu, node):
 610   """Ensure that a given node is not drained.
 611
 612   @param lu: the LU on behalf of which we make the check
 613   @param node: the node to check
 614   @raise errors.OpPrereqError: if the node is drained
 615
 616   """
 617   if lu.cfg.GetNodeInfo(node).drained:
 618     raise errors.OpPrereqError("Can't use drained node %s" % node,
 619                                errors.ECODE_INVAL)
 620
 621
 622 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 623   """Ensure that a node supports a given OS.
 624
 625   @param lu: the LU on behalf of which we make the check
 626   @param node: the node to check
 627   @param os_name: the OS to query about
 628   @param force_variant: whether to ignore variant errors
 629   @raise errors.OpPrereqError: if the node is not supporting the OS
 630
 631   """
 632   result = lu.rpc.call_os_get(node, os_name)
 633   result.Raise("OS '%s' not in supported OS list for node %s" %
 634                (os_name, node),
 635                prereq=True, ecode=errors.ECODE_INVAL)
 636   if not force_variant:
 637     _CheckOSVariant(result.payload, os_name)
 638
 639
 640 def _RequireFileStorage():
 641   """Checks that file storage is enabled.
 642
 643   @raise errors.OpPrereqError: when file storage is disabled
 644
 645   """
 646   if not constants.ENABLE_FILE_STORAGE:
 647     raise errors.OpPrereqError("File storage disabled at configure time",
 648                                errors.ECODE_INVAL)
 649
 650
 651 def _CheckDiskTemplate(template):
 652   """Ensure a given disk template is valid.
 653
 654   """
 655   if template not in constants.DISK_TEMPLATES:
 656     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 657            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 658     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 659   if template == constants.DT_FILE:
 660     _RequireFileStorage()
 661   return True
 662
 663
 664 def _CheckStorageType(storage_type):
 665   """Ensure a given storage type is valid.
 666
 667   """
 668   if storage_type not in constants.VALID_STORAGE_TYPES:
 669     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 670                                errors.ECODE_INVAL)
 671   if storage_type == constants.ST_FILE:
 672     _RequireFileStorage()
 673   return True
 674
 675
 676 def _GetClusterDomainSecret():
 677   """Reads the cluster domain secret.
 678
 679   """
 680   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 681                                strict=True)
 682
 683
 684 def _CheckInstanceDown(lu, instance, reason):
 685   """Ensure that an instance is not running."""
 686   if instance.admin_up:
 687     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 688                                (instance.name, reason), errors.ECODE_STATE)
 689
 690   pnode = instance.primary_node
 691   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 692   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 693               prereq=True, ecode=errors.ECODE_ENVIRON)
 694
 695   if instance.name in ins_l.payload:
 696     raise errors.OpPrereqError("Instance %s is running, %s" %
 697                                (instance.name, reason), errors.ECODE_STATE)
 698
 699
 700 def _ExpandItemName(fn, name, kind):
 701   """Expand an item name.
 702
 703   @param fn: the function to use for expansion
 704   @param name: requested item name
 705   @param kind: text description ('Node' or 'Instance')
 706   @return: the resolved (full) name
 707   @raise errors.OpPrereqError: if the item is not found
 708
 709   """
 710   full_name = fn(name)
 711   if full_name is None:
 712     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 713                                errors.ECODE_NOENT)
 714   return full_name
 715
 716
 717 def _ExpandNodeName(cfg, name):
 718   """Wrapper over L{_ExpandItemName} for nodes."""
 719   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 720
 721
 722 def _ExpandInstanceName(cfg, name):
 723   """Wrapper over L{_ExpandItemName} for instance."""
 724   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 725
 726
 727 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 728                           memory, vcpus, nics, disk_template, disks,
 729                           bep, hvp, hypervisor_name):
 730   """Builds instance related env variables for hooks
 731
 732   This builds the hook environment from individual variables.
 733
 734   @type name: string
 735   @param name: the name of the instance
 736   @type primary_node: string
 737   @param primary_node: the name of the instance's primary node
 738   @type secondary_nodes: list
 739   @param secondary_nodes: list of secondary nodes as strings
 740   @type os_type: string
 741   @param os_type: the name of the instance's OS
 742   @type status: boolean
 743   @param status: the should_run status of the instance
 744   @type memory: string
 745   @param memory: the memory size of the instance
 746   @type vcpus: string
 747   @param vcpus: the count of VCPUs the instance has
 748   @type nics: list
 749   @param nics: list of tuples (ip, mac, mode, link) representing
 750       the NICs the instance has
 751   @type disk_template: string
 752   @param disk_template: the disk template of the instance
 753   @type disks: list
 754   @param disks: the list of (size, mode) pairs
 755   @type bep: dict
 756   @param bep: the backend parameters for the instance
 757   @type hvp: dict
 758   @param hvp: the hypervisor parameters for the instance
 759   @type hypervisor_name: string
 760   @param hypervisor_name: the hypervisor for the instance
 761   @rtype: dict
 762   @return: the hook environment for this instance
 763
 764   """
 765   if status:
 766     str_status = "up"
 767   else:
 768     str_status = "down"
 769   env = {
 770     "OP_TARGET": name,
 771     "INSTANCE_NAME": name,
 772     "INSTANCE_PRIMARY": primary_node,
 773     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 774     "INSTANCE_OS_TYPE": os_type,
 775     "INSTANCE_STATUS": str_status,
 776     "INSTANCE_MEMORY": memory,
 777     "INSTANCE_VCPUS": vcpus,
 778     "INSTANCE_DISK_TEMPLATE": disk_template,
 779     "INSTANCE_HYPERVISOR": hypervisor_name,
 780   }
 781
 782   if nics:
 783     nic_count = len(nics)
 784     for idx, (ip, mac, mode, link) in enumerate(nics):
 785       if ip is None:
 786         ip = ""
 787       env["INSTANCE_NIC%d_IP" % idx] = ip
 788       env["INSTANCE_NIC%d_MAC" % idx] = mac
 789       env["INSTANCE_NIC%d_MODE" % idx] = mode
 790       env["INSTANCE_NIC%d_LINK" % idx] = link
 791       if mode == constants.NIC_MODE_BRIDGED:
 792         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 793   else:
 794     nic_count = 0
 795
 796   env["INSTANCE_NIC_COUNT"] = nic_count
 797
 798   if disks:
 799     disk_count = len(disks)
 800     for idx, (size, mode) in enumerate(disks):
 801       env["INSTANCE_DISK%d_SIZE" % idx] = size
 802       env["INSTANCE_DISK%d_MODE" % idx] = mode
 803   else:
 804     disk_count = 0
 805
 806   env["INSTANCE_DISK_COUNT"] = disk_count
 807
 808   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 809     for key, value in source.items():
 810       env["INSTANCE_%s_%s" % (kind, key)] = value
 811
 812   return env
 813
 814
 815 def _NICListToTuple(lu, nics):
 816   """Build a list of nic information tuples.
 817
 818   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 819   value in LUQueryInstanceData.
 820
 821   @type lu:  L{LogicalUnit}
 822   @param lu: the logical unit on whose behalf we execute
 823   @type nics: list of L{objects.NIC}
 824   @param nics: list of nics to convert to hooks tuples
 825
 826   """
 827   hooks_nics = []
 828   cluster = lu.cfg.GetClusterInfo()
 829   for nic in nics:
 830     ip = nic.ip
 831     mac = nic.mac
 832     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 833     mode = filled_params[constants.NIC_MODE]
 834     link = filled_params[constants.NIC_LINK]
 835     hooks_nics.append((ip, mac, mode, link))
 836   return hooks_nics
 837
 838
 839 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 840   """Builds instance related env variables for hooks from an object.
 841
 842   @type lu: L{LogicalUnit}
 843   @param lu: the logical unit on whose behalf we execute
 844   @type instance: L{objects.Instance}
 845   @param instance: the instance for which we should build the
 846       environment
 847   @type override: dict
 848   @param override: dictionary with key/values that will override
 849       our values
 850   @rtype: dict
 851   @return: the hook environment dictionary
 852
 853   """
 854   cluster = lu.cfg.GetClusterInfo()
 855   bep = cluster.FillBE(instance)
 856   hvp = cluster.FillHV(instance)
 857   args = {
 858     'name': instance.name,
 859     'primary_node': instance.primary_node,
 860     'secondary_nodes': instance.secondary_nodes,
 861     'os_type': instance.os,
 862     'status': instance.admin_up,
 863     'memory': bep[constants.BE_MEMORY],
 864     'vcpus': bep[constants.BE_VCPUS],
 865     'nics': _NICListToTuple(lu, instance.nics),
 866     'disk_template': instance.disk_template,
 867     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 868     'bep': bep,
 869     'hvp': hvp,
 870     'hypervisor_name': instance.hypervisor,
 871   }
 872   if override:
 873     args.update(override)
 874   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 875
 876
 877 def _AdjustCandidatePool(lu, exceptions):
 878   """Adjust the candidate pool after node operations.
 879
 880   """
 881   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 882   if mod_list:
 883     lu.LogInfo("Promoted nodes to master candidate role: %s",
 884                utils.CommaJoin(node.name for node in mod_list))
 885     for name in mod_list:
 886       lu.context.ReaddNode(name)
 887   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 888   if mc_now > mc_max:
 889     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 890                (mc_now, mc_max))
 891
 892
 893 def _DecideSelfPromotion(lu, exceptions=None):
 894   """Decide whether I should promote myself as a master candidate.
 895
 896   """
 897   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 898   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 899   # the new node will increase mc_max with one, so:
 900   mc_should = min(mc_should + 1, cp_size)
 901   return mc_now < mc_should
 902
 903
 904 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 905   """Check that the brigdes needed by a list of nics exist.
 906
 907   """
 908   cluster = lu.cfg.GetClusterInfo()
 909   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 910   brlist = [params[constants.NIC_LINK] for params in paramslist
 911             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 912   if brlist:
 913     result = lu.rpc.call_bridges_exist(target_node, brlist)
 914     result.Raise("Error checking bridges on destination node '%s'" %
 915                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 916
 917
 918 def _CheckInstanceBridgesExist(lu, instance, node=None):
 919   """Check that the brigdes needed by an instance exist.
 920
 921   """
 922   if node is None:
 923     node = instance.primary_node
 924   _CheckNicsBridgesExist(lu, instance.nics, node)
 925
 926
 927 def _CheckOSVariant(os_obj, name):
 928   """Check whether an OS name conforms to the os variants specification.
 929
 930   @type os_obj: L{objects.OS}
 931   @param os_obj: OS object to check
 932   @type name: string
 933   @param name: OS name passed by the user, to check for validity
 934
 935   """
 936   if not os_obj.supported_variants:
 937     return
 938   variant = objects.OS.GetVariant(name)
 939   if not variant:
 940     raise errors.OpPrereqError("OS name must include a variant",
 941                                errors.ECODE_INVAL)
 942
 943   if variant not in os_obj.supported_variants:
 944     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
 945
 946
 947 def _GetNodeInstancesInner(cfg, fn):
 948   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 949
 950
 951 def _GetNodeInstances(cfg, node_name):
 952   """Returns a list of all primary and secondary instances on a node.
 953
 954   """
 955
 956   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
 957
 958
 959 def _GetNodePrimaryInstances(cfg, node_name):
 960   """Returns primary instances on a node.
 961
 962   """
 963   return _GetNodeInstancesInner(cfg,
 964                                 lambda inst: node_name == inst.primary_node)
 965
 966
 967 def _GetNodeSecondaryInstances(cfg, node_name):
 968   """Returns secondary instances on a node.
 969
 970   """
 971   return _GetNodeInstancesInner(cfg,
 972                                 lambda inst: node_name in inst.secondary_nodes)
 973
 974
 975 def _GetStorageTypeArgs(cfg, storage_type):
 976   """Returns the arguments for a storage type.
 977
 978   """
 979   # Special case for file storage
 980   if storage_type == constants.ST_FILE:
 981     # storage.FileStorage wants a list of storage directories
 982     return [[cfg.GetFileStorageDir()]]
 983
 984   return []
 985
 986
 987 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
 988   faulty = []
 989
 990   for dev in instance.disks:
 991     cfg.SetDiskID(dev, node_name)
 992
 993   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
 994   result.Raise("Failed to get disk status from node %s" % node_name,
 995                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 996
 997   for idx, bdev_status in enumerate(result.payload):
 998     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
 999       faulty.append(idx)
1000
1001   return faulty
1002
1003
1004 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1005   """Check the sanity of iallocator and node arguments and use the
1006   cluster-wide iallocator if appropriate.
1007
1008   Check that at most one of (iallocator, node) is specified. If none is
1009   specified, then the LU's opcode's iallocator slot is filled with the
1010   cluster-wide default iallocator.
1011
1012   @type iallocator_slot: string
1013   @param iallocator_slot: the name of the opcode iallocator slot
1014   @type node_slot: string
1015   @param node_slot: the name of the opcode target node slot
1016
1017   """
1018   node = getattr(lu.op, node_slot, None)
1019   iallocator = getattr(lu.op, iallocator_slot, None)
1020
1021   if node is not None and iallocator is not None:
1022     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1023                                errors.ECODE_INVAL)
1024   elif node is None and iallocator is None:
1025     default_iallocator = lu.cfg.GetDefaultIAllocator()
1026     if default_iallocator:
1027       setattr(lu.op, iallocator_slot, default_iallocator)
1028     else:
1029       raise errors.OpPrereqError("No iallocator or node given and no"
1030                                  " cluster-wide default iallocator found."
1031                                  " Please specify either an iallocator or a"
1032                                  " node, or set a cluster-wide default"
1033                                  " iallocator.")
1034
1035
1036 class LUPostInitCluster(LogicalUnit):
1037   """Logical unit for running hooks after cluster initialization.
1038
1039   """
1040   HPATH = "cluster-init"
1041   HTYPE = constants.HTYPE_CLUSTER
1042
1043   def BuildHooksEnv(self):
1044     """Build hooks env.
1045
1046     """
1047     env = {"OP_TARGET": self.cfg.GetClusterName()}
1048     mn = self.cfg.GetMasterNode()
1049     return env, [], [mn]
1050
1051   def Exec(self, feedback_fn):
1052     """Nothing to do.
1053
1054     """
1055     return True
1056
1057
1058 class LUDestroyCluster(LogicalUnit):
1059   """Logical unit for destroying the cluster.
1060
1061   """
1062   HPATH = "cluster-destroy"
1063   HTYPE = constants.HTYPE_CLUSTER
1064
1065   def BuildHooksEnv(self):
1066     """Build hooks env.
1067
1068     """
1069     env = {"OP_TARGET": self.cfg.GetClusterName()}
1070     return env, [], []
1071
1072   def CheckPrereq(self):
1073     """Check prerequisites.
1074
1075     This checks whether the cluster is empty.
1076
1077     Any errors are signaled by raising errors.OpPrereqError.
1078
1079     """
1080     master = self.cfg.GetMasterNode()
1081
1082     nodelist = self.cfg.GetNodeList()
1083     if len(nodelist) != 1 or nodelist[0] != master:
1084       raise errors.OpPrereqError("There are still %d node(s) in"
1085                                  " this cluster." % (len(nodelist) - 1),
1086                                  errors.ECODE_INVAL)
1087     instancelist = self.cfg.GetInstanceList()
1088     if instancelist:
1089       raise errors.OpPrereqError("There are still %d instance(s) in"
1090                                  " this cluster." % len(instancelist),
1091                                  errors.ECODE_INVAL)
1092
1093   def Exec(self, feedback_fn):
1094     """Destroys the cluster.
1095
1096     """
1097     master = self.cfg.GetMasterNode()
1098
1099     # Run post hooks on master node before it's removed
1100     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1101     try:
1102       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1103     except:
1104       # pylint: disable-msg=W0702
1105       self.LogWarning("Errors occurred running hooks on %s" % master)
1106
1107     result = self.rpc.call_node_stop_master(master, False)
1108     result.Raise("Could not disable the master role")
1109
1110     return master
1111
1112
1113 def _VerifyCertificate(filename):
1114   """Verifies a certificate for LUVerifyCluster.
1115
1116   @type filename: string
1117   @param filename: Path to PEM file
1118
1119   """
1120   try:
1121     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1122                                            utils.ReadFile(filename))
1123   except Exception, err: # pylint: disable-msg=W0703
1124     return (LUVerifyCluster.ETYPE_ERROR,
1125             "Failed to load X509 certificate %s: %s" % (filename, err))
1126
1127   (errcode, msg) = \
1128     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1129                                 constants.SSL_CERT_EXPIRATION_ERROR)
1130
1131   if msg:
1132     fnamemsg = "While verifying %s: %s" % (filename, msg)
1133   else:
1134     fnamemsg = None
1135
1136   if errcode is None:
1137     return (None, fnamemsg)
1138   elif errcode == utils.CERT_WARNING:
1139     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1140   elif errcode == utils.CERT_ERROR:
1141     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1142
1143   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1144
1145
1146 class LUVerifyCluster(LogicalUnit):
1147   """Verifies the cluster status.
1148
1149   """
1150   HPATH = "cluster-verify"
1151   HTYPE = constants.HTYPE_CLUSTER
1152   _OP_PARAMS = [
1153     ("skip_checks", ht.EmptyList,
1154      ht.TListOf(ht.TElemOf(constants.VERIFY_OPTIONAL_CHECKS))),
1155     ("verbose", False, ht.TBool),
1156     ("error_codes", False, ht.TBool),
1157     ("debug_simulate_errors", False, ht.TBool),
1158     ]
1159   REQ_BGL = False
1160
1161   TCLUSTER = "cluster"
1162   TNODE = "node"
1163   TINSTANCE = "instance"
1164
1165   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1166   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1167   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1168   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1169   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1170   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1171   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1172   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1173   ENODEDRBD = (TNODE, "ENODEDRBD")
1174   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1175   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1176   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1177   ENODEHV = (TNODE, "ENODEHV")
1178   ENODELVM = (TNODE, "ENODELVM")
1179   ENODEN1 = (TNODE, "ENODEN1")
1180   ENODENET = (TNODE, "ENODENET")
1181   ENODEOS = (TNODE, "ENODEOS")
1182   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1183   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1184   ENODERPC = (TNODE, "ENODERPC")
1185   ENODESSH = (TNODE, "ENODESSH")
1186   ENODEVERSION = (TNODE, "ENODEVERSION")
1187   ENODESETUP = (TNODE, "ENODESETUP")
1188   ENODETIME = (TNODE, "ENODETIME")
1189
1190   ETYPE_FIELD = "code"
1191   ETYPE_ERROR = "ERROR"
1192   ETYPE_WARNING = "WARNING"
1193
1194   class NodeImage(object):
1195     """A class representing the logical and physical status of a node.
1196
1197     @type name: string
1198     @ivar name: the node name to which this object refers
1199     @ivar volumes: a structure as returned from
1200         L{ganeti.backend.GetVolumeList} (runtime)
1201     @ivar instances: a list of running instances (runtime)
1202     @ivar pinst: list of configured primary instances (config)
1203     @ivar sinst: list of configured secondary instances (config)
1204     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1205         of this node (config)
1206     @ivar mfree: free memory, as reported by hypervisor (runtime)
1207     @ivar dfree: free disk, as reported by the node (runtime)
1208     @ivar offline: the offline status (config)
1209     @type rpc_fail: boolean
1210     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1211         not whether the individual keys were correct) (runtime)
1212     @type lvm_fail: boolean
1213     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1214     @type hyp_fail: boolean
1215     @ivar hyp_fail: whether the RPC call didn't return the instance list
1216     @type ghost: boolean
1217     @ivar ghost: whether this is a known node or not (config)
1218     @type os_fail: boolean
1219     @ivar os_fail: whether the RPC call didn't return valid OS data
1220     @type oslist: list
1221     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1222
1223     """
1224     def __init__(self, offline=False, name=None):
1225       self.name = name
1226       self.volumes = {}
1227       self.instances = []
1228       self.pinst = []
1229       self.sinst = []
1230       self.sbp = {}
1231       self.mfree = 0
1232       self.dfree = 0
1233       self.offline = offline
1234       self.rpc_fail = False
1235       self.lvm_fail = False
1236       self.hyp_fail = False
1237       self.ghost = False
1238       self.os_fail = False
1239       self.oslist = {}
1240
1241   def ExpandNames(self):
1242     self.needed_locks = {
1243       locking.LEVEL_NODE: locking.ALL_SET,
1244       locking.LEVEL_INSTANCE: locking.ALL_SET,
1245     }
1246     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1247
1248   def _Error(self, ecode, item, msg, *args, **kwargs):
1249     """Format an error message.
1250
1251     Based on the opcode's error_codes parameter, either format a
1252     parseable error code, or a simpler error string.
1253
1254     This must be called only from Exec and functions called from Exec.
1255
1256     """
1257     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1258     itype, etxt = ecode
1259     # first complete the msg
1260     if args:
1261       msg = msg % args
1262     # then format the whole message
1263     if self.op.error_codes:
1264       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1265     else:
1266       if item:
1267         item = " " + item
1268       else:
1269         item = ""
1270       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1271     # and finally report it via the feedback_fn
1272     self._feedback_fn("  - %s" % msg)
1273
1274   def _ErrorIf(self, cond, *args, **kwargs):
1275     """Log an error message if the passed condition is True.
1276
1277     """
1278     cond = bool(cond) or self.op.debug_simulate_errors
1279     if cond:
1280       self._Error(*args, **kwargs)
1281     # do not mark the operation as failed for WARN cases only
1282     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1283       self.bad = self.bad or cond
1284
1285   def _VerifyNode(self, ninfo, nresult):
1286     """Perform some basic validation on data returned from a node.
1287
1288       - check the result data structure is well formed and has all the
1289         mandatory fields
1290       - check ganeti version
1291
1292     @type ninfo: L{objects.Node}
1293     @param ninfo: the node to check
1294     @param nresult: the results from the node
1295     @rtype: boolean
1296     @return: whether overall this call was successful (and we can expect
1297          reasonable values in the respose)
1298
1299     """
1300     node = ninfo.name
1301     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1302
1303     # main result, nresult should be a non-empty dict
1304     test = not nresult or not isinstance(nresult, dict)
1305     _ErrorIf(test, self.ENODERPC, node,
1306                   "unable to verify node: no data returned")
1307     if test:
1308       return False
1309
1310     # compares ganeti version
1311     local_version = constants.PROTOCOL_VERSION
1312     remote_version = nresult.get("version", None)
1313     test = not (remote_version and
1314                 isinstance(remote_version, (list, tuple)) and
1315                 len(remote_version) == 2)
1316     _ErrorIf(test, self.ENODERPC, node,
1317              "connection to node returned invalid data")
1318     if test:
1319       return False
1320
1321     test = local_version != remote_version[0]
1322     _ErrorIf(test, self.ENODEVERSION, node,
1323              "incompatible protocol versions: master %s,"
1324              " node %s", local_version, remote_version[0])
1325     if test:
1326       return False
1327
1328     # node seems compatible, we can actually try to look into its results
1329
1330     # full package version
1331     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1332                   self.ENODEVERSION, node,
1333                   "software version mismatch: master %s, node %s",
1334                   constants.RELEASE_VERSION, remote_version[1],
1335                   code=self.ETYPE_WARNING)
1336
1337     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1338     if isinstance(hyp_result, dict):
1339       for hv_name, hv_result in hyp_result.iteritems():
1340         test = hv_result is not None
1341         _ErrorIf(test, self.ENODEHV, node,
1342                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1343
1344
1345     test = nresult.get(constants.NV_NODESETUP,
1346                            ["Missing NODESETUP results"])
1347     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1348              "; ".join(test))
1349
1350     return True
1351
1352   def _VerifyNodeTime(self, ninfo, nresult,
1353                       nvinfo_starttime, nvinfo_endtime):
1354     """Check the node time.
1355
1356     @type ninfo: L{objects.Node}
1357     @param ninfo: the node to check
1358     @param nresult: the remote results for the node
1359     @param nvinfo_starttime: the start time of the RPC call
1360     @param nvinfo_endtime: the end time of the RPC call
1361
1362     """
1363     node = ninfo.name
1364     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1365
1366     ntime = nresult.get(constants.NV_TIME, None)
1367     try:
1368       ntime_merged = utils.MergeTime(ntime)
1369     except (ValueError, TypeError):
1370       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1371       return
1372
1373     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1374       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1375     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1376       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1377     else:
1378       ntime_diff = None
1379
1380     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1381              "Node time diverges by at least %s from master node time",
1382              ntime_diff)
1383
1384   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1385     """Check the node time.
1386
1387     @type ninfo: L{objects.Node}
1388     @param ninfo: the node to check
1389     @param nresult: the remote results for the node
1390     @param vg_name: the configured VG name
1391
1392     """
1393     if vg_name is None:
1394       return
1395
1396     node = ninfo.name
1397     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1398
1399     # checks vg existence and size > 20G
1400     vglist = nresult.get(constants.NV_VGLIST, None)
1401     test = not vglist
1402     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1403     if not test:
1404       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1405                                             constants.MIN_VG_SIZE)
1406       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1407
1408     # check pv names
1409     pvlist = nresult.get(constants.NV_PVLIST, None)
1410     test = pvlist is None
1411     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1412     if not test:
1413       # check that ':' is not present in PV names, since it's a
1414       # special character for lvcreate (denotes the range of PEs to
1415       # use on the PV)
1416       for _, pvname, owner_vg in pvlist:
1417         test = ":" in pvname
1418         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1419                  " '%s' of VG '%s'", pvname, owner_vg)
1420
1421   def _VerifyNodeNetwork(self, ninfo, nresult):
1422     """Check the node time.
1423
1424     @type ninfo: L{objects.Node}
1425     @param ninfo: the node to check
1426     @param nresult: the remote results for the node
1427
1428     """
1429     node = ninfo.name
1430     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1431
1432     test = constants.NV_NODELIST not in nresult
1433     _ErrorIf(test, self.ENODESSH, node,
1434              "node hasn't returned node ssh connectivity data")
1435     if not test:
1436       if nresult[constants.NV_NODELIST]:
1437         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1438           _ErrorIf(True, self.ENODESSH, node,
1439                    "ssh communication with node '%s': %s", a_node, a_msg)
1440
1441     test = constants.NV_NODENETTEST not in nresult
1442     _ErrorIf(test, self.ENODENET, node,
1443              "node hasn't returned node tcp connectivity data")
1444     if not test:
1445       if nresult[constants.NV_NODENETTEST]:
1446         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1447         for anode in nlist:
1448           _ErrorIf(True, self.ENODENET, node,
1449                    "tcp communication with node '%s': %s",
1450                    anode, nresult[constants.NV_NODENETTEST][anode])
1451
1452     test = constants.NV_MASTERIP not in nresult
1453     _ErrorIf(test, self.ENODENET, node,
1454              "node hasn't returned node master IP reachability data")
1455     if not test:
1456       if not nresult[constants.NV_MASTERIP]:
1457         if node == self.master_node:
1458           msg = "the master node cannot reach the master IP (not configured?)"
1459         else:
1460           msg = "cannot reach the master IP"
1461         _ErrorIf(True, self.ENODENET, node, msg)
1462
1463
1464   def _VerifyInstance(self, instance, instanceconfig, node_image):
1465     """Verify an instance.
1466
1467     This function checks to see if the required block devices are
1468     available on the instance's node.
1469
1470     """
1471     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1472     node_current = instanceconfig.primary_node
1473
1474     node_vol_should = {}
1475     instanceconfig.MapLVsByNode(node_vol_should)
1476
1477     for node in node_vol_should:
1478       n_img = node_image[node]
1479       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1480         # ignore missing volumes on offline or broken nodes
1481         continue
1482       for volume in node_vol_should[node]:
1483         test = volume not in n_img.volumes
1484         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1485                  "volume %s missing on node %s", volume, node)
1486
1487     if instanceconfig.admin_up:
1488       pri_img = node_image[node_current]
1489       test = instance not in pri_img.instances and not pri_img.offline
1490       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1491                "instance not running on its primary node %s",
1492                node_current)
1493
1494     for node, n_img in node_image.items():
1495       if (not node == node_current):
1496         test = instance in n_img.instances
1497         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1498                  "instance should not run on node %s", node)
1499
1500   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1501     """Verify if there are any unknown volumes in the cluster.
1502
1503     The .os, .swap and backup volumes are ignored. All other volumes are
1504     reported as unknown.
1505
1506     @type reserved: L{ganeti.utils.FieldSet}
1507     @param reserved: a FieldSet of reserved volume names
1508
1509     """
1510     for node, n_img in node_image.items():
1511       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1512         # skip non-healthy nodes
1513         continue
1514       for volume in n_img.volumes:
1515         test = ((node not in node_vol_should or
1516                 volume not in node_vol_should[node]) and
1517                 not reserved.Matches(volume))
1518         self._ErrorIf(test, self.ENODEORPHANLV, node,
1519                       "volume %s is unknown", volume)
1520
1521   def _VerifyOrphanInstances(self, instancelist, node_image):
1522     """Verify the list of running instances.
1523
1524     This checks what instances are running but unknown to the cluster.
1525
1526     """
1527     for node, n_img in node_image.items():
1528       for o_inst in n_img.instances:
1529         test = o_inst not in instancelist
1530         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1531                       "instance %s on node %s should not exist", o_inst, node)
1532
1533   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1534     """Verify N+1 Memory Resilience.
1535
1536     Check that if one single node dies we can still start all the
1537     instances it was primary for.
1538
1539     """
1540     for node, n_img in node_image.items():
1541       # This code checks that every node which is now listed as
1542       # secondary has enough memory to host all instances it is
1543       # supposed to should a single other node in the cluster fail.
1544       # FIXME: not ready for failover to an arbitrary node
1545       # FIXME: does not support file-backed instances
1546       # WARNING: we currently take into account down instances as well
1547       # as up ones, considering that even if they're down someone
1548       # might want to start them even in the event of a node failure.
1549       for prinode, instances in n_img.sbp.items():
1550         needed_mem = 0
1551         for instance in instances:
1552           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1553           if bep[constants.BE_AUTO_BALANCE]:
1554             needed_mem += bep[constants.BE_MEMORY]
1555         test = n_img.mfree < needed_mem
1556         self._ErrorIf(test, self.ENODEN1, node,
1557                       "not enough memory on to accommodate"
1558                       " failovers should peer node %s fail", prinode)
1559
1560   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1561                        master_files):
1562     """Verifies and computes the node required file checksums.
1563
1564     @type ninfo: L{objects.Node}
1565     @param ninfo: the node to check
1566     @param nresult: the remote results for the node
1567     @param file_list: required list of files
1568     @param local_cksum: dictionary of local files and their checksums
1569     @param master_files: list of files that only masters should have
1570
1571     """
1572     node = ninfo.name
1573     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1574
1575     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1576     test = not isinstance(remote_cksum, dict)
1577     _ErrorIf(test, self.ENODEFILECHECK, node,
1578              "node hasn't returned file checksum data")
1579     if test:
1580       return
1581
1582     for file_name in file_list:
1583       node_is_mc = ninfo.master_candidate
1584       must_have = (file_name not in master_files) or node_is_mc
1585       # missing
1586       test1 = file_name not in remote_cksum
1587       # invalid checksum
1588       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1589       # existing and good
1590       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1591       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1592                "file '%s' missing", file_name)
1593       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1594                "file '%s' has wrong checksum", file_name)
1595       # not candidate and this is not a must-have file
1596       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1597                "file '%s' should not exist on non master"
1598                " candidates (and the file is outdated)", file_name)
1599       # all good, except non-master/non-must have combination
1600       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1601                "file '%s' should not exist"
1602                " on non master candidates", file_name)
1603
1604   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1605                       drbd_map):
1606     """Verifies and the node DRBD status.
1607
1608     @type ninfo: L{objects.Node}
1609     @param ninfo: the node to check
1610     @param nresult: the remote results for the node
1611     @param instanceinfo: the dict of instances
1612     @param drbd_helper: the configured DRBD usermode helper
1613     @param drbd_map: the DRBD map as returned by
1614         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1615
1616     """
1617     node = ninfo.name
1618     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1619
1620     if drbd_helper:
1621       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1622       test = (helper_result == None)
1623       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1624                "no drbd usermode helper returned")
1625       if helper_result:
1626         status, payload = helper_result
1627         test = not status
1628         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1629                  "drbd usermode helper check unsuccessful: %s", payload)
1630         test = status and (payload != drbd_helper)
1631         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1632                  "wrong drbd usermode helper: %s", payload)
1633
1634     # compute the DRBD minors
1635     node_drbd = {}
1636     for minor, instance in drbd_map[node].items():
1637       test = instance not in instanceinfo
1638       _ErrorIf(test, self.ECLUSTERCFG, None,
1639                "ghost instance '%s' in temporary DRBD map", instance)
1640         # ghost instance should not be running, but otherwise we
1641         # don't give double warnings (both ghost instance and
1642         # unallocated minor in use)
1643       if test:
1644         node_drbd[minor] = (instance, False)
1645       else:
1646         instance = instanceinfo[instance]
1647         node_drbd[minor] = (instance.name, instance.admin_up)
1648
1649     # and now check them
1650     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1651     test = not isinstance(used_minors, (tuple, list))
1652     _ErrorIf(test, self.ENODEDRBD, node,
1653              "cannot parse drbd status file: %s", str(used_minors))
1654     if test:
1655       # we cannot check drbd status
1656       return
1657
1658     for minor, (iname, must_exist) in node_drbd.items():
1659       test = minor not in used_minors and must_exist
1660       _ErrorIf(test, self.ENODEDRBD, node,
1661                "drbd minor %d of instance %s is not active", minor, iname)
1662     for minor in used_minors:
1663       test = minor not in node_drbd
1664       _ErrorIf(test, self.ENODEDRBD, node,
1665                "unallocated drbd minor %d is in use", minor)
1666
1667   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1668     """Builds the node OS structures.
1669
1670     @type ninfo: L{objects.Node}
1671     @param ninfo: the node to check
1672     @param nresult: the remote results for the node
1673     @param nimg: the node image object
1674
1675     """
1676     node = ninfo.name
1677     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1678
1679     remote_os = nresult.get(constants.NV_OSLIST, None)
1680     test = (not isinstance(remote_os, list) or
1681             not compat.all(isinstance(v, list) and len(v) == 7
1682                            for v in remote_os))
1683
1684     _ErrorIf(test, self.ENODEOS, node,
1685              "node hasn't returned valid OS data")
1686
1687     nimg.os_fail = test
1688
1689     if test:
1690       return
1691
1692     os_dict = {}
1693
1694     for (name, os_path, status, diagnose,
1695          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1696
1697       if name not in os_dict:
1698         os_dict[name] = []
1699
1700       # parameters is a list of lists instead of list of tuples due to
1701       # JSON lacking a real tuple type, fix it:
1702       parameters = [tuple(v) for v in parameters]
1703       os_dict[name].append((os_path, status, diagnose,
1704                             set(variants), set(parameters), set(api_ver)))
1705
1706     nimg.oslist = os_dict
1707
1708   def _VerifyNodeOS(self, ninfo, nimg, base):
1709     """Verifies the node OS list.
1710
1711     @type ninfo: L{objects.Node}
1712     @param ninfo: the node to check
1713     @param nimg: the node image object
1714     @param base: the 'template' node we match against (e.g. from the master)
1715
1716     """
1717     node = ninfo.name
1718     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1719
1720     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1721
1722     for os_name, os_data in nimg.oslist.items():
1723       assert os_data, "Empty OS status for OS %s?!" % os_name
1724       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1725       _ErrorIf(not f_status, self.ENODEOS, node,
1726                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1727       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1728                "OS '%s' has multiple entries (first one shadows the rest): %s",
1729                os_name, utils.CommaJoin([v[0] for v in os_data]))
1730       # this will catched in backend too
1731       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1732                and not f_var, self.ENODEOS, node,
1733                "OS %s with API at least %d does not declare any variant",
1734                os_name, constants.OS_API_V15)
1735       # comparisons with the 'base' image
1736       test = os_name not in base.oslist
1737       _ErrorIf(test, self.ENODEOS, node,
1738                "Extra OS %s not present on reference node (%s)",
1739                os_name, base.name)
1740       if test:
1741         continue
1742       assert base.oslist[os_name], "Base node has empty OS status?"
1743       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1744       if not b_status:
1745         # base OS is invalid, skipping
1746         continue
1747       for kind, a, b in [("API version", f_api, b_api),
1748                          ("variants list", f_var, b_var),
1749                          ("parameters", f_param, b_param)]:
1750         _ErrorIf(a != b, self.ENODEOS, node,
1751                  "OS %s %s differs from reference node %s: %s vs. %s",
1752                  kind, os_name, base.name,
1753                  utils.CommaJoin(a), utils.CommaJoin(b))
1754
1755     # check any missing OSes
1756     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1757     _ErrorIf(missing, self.ENODEOS, node,
1758              "OSes present on reference node %s but missing on this node: %s",
1759              base.name, utils.CommaJoin(missing))
1760
1761   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1762     """Verifies and updates the node volume data.
1763
1764     This function will update a L{NodeImage}'s internal structures
1765     with data from the remote call.
1766
1767     @type ninfo: L{objects.Node}
1768     @param ninfo: the node to check
1769     @param nresult: the remote results for the node
1770     @param nimg: the node image object
1771     @param vg_name: the configured VG name
1772
1773     """
1774     node = ninfo.name
1775     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1776
1777     nimg.lvm_fail = True
1778     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1779     if vg_name is None:
1780       pass
1781     elif isinstance(lvdata, basestring):
1782       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1783                utils.SafeEncode(lvdata))
1784     elif not isinstance(lvdata, dict):
1785       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1786     else:
1787       nimg.volumes = lvdata
1788       nimg.lvm_fail = False
1789
1790   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1791     """Verifies and updates the node instance list.
1792
1793     If the listing was successful, then updates this node's instance
1794     list. Otherwise, it marks the RPC call as failed for the instance
1795     list key.
1796
1797     @type ninfo: L{objects.Node}
1798     @param ninfo: the node to check
1799     @param nresult: the remote results for the node
1800     @param nimg: the node image object
1801
1802     """
1803     idata = nresult.get(constants.NV_INSTANCELIST, None)
1804     test = not isinstance(idata, list)
1805     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1806                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1807     if test:
1808       nimg.hyp_fail = True
1809     else:
1810       nimg.instances = idata
1811
1812   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1813     """Verifies and computes a node information map
1814
1815     @type ninfo: L{objects.Node}
1816     @param ninfo: the node to check
1817     @param nresult: the remote results for the node
1818     @param nimg: the node image object
1819     @param vg_name: the configured VG name
1820
1821     """
1822     node = ninfo.name
1823     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1824
1825     # try to read free memory (from the hypervisor)
1826     hv_info = nresult.get(constants.NV_HVINFO, None)
1827     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1828     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1829     if not test:
1830       try:
1831         nimg.mfree = int(hv_info["memory_free"])
1832       except (ValueError, TypeError):
1833         _ErrorIf(True, self.ENODERPC, node,
1834                  "node returned invalid nodeinfo, check hypervisor")
1835
1836     # FIXME: devise a free space model for file based instances as well
1837     if vg_name is not None:
1838       test = (constants.NV_VGLIST not in nresult or
1839               vg_name not in nresult[constants.NV_VGLIST])
1840       _ErrorIf(test, self.ENODELVM, node,
1841                "node didn't return data for the volume group '%s'"
1842                " - it is either missing or broken", vg_name)
1843       if not test:
1844         try:
1845           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1846         except (ValueError, TypeError):
1847           _ErrorIf(True, self.ENODERPC, node,
1848                    "node returned invalid LVM info, check LVM status")
1849
1850   def BuildHooksEnv(self):
1851     """Build hooks env.
1852
1853     Cluster-Verify hooks just ran in the post phase and their failure makes
1854     the output be logged in the verify output and the verification to fail.
1855
1856     """
1857     all_nodes = self.cfg.GetNodeList()
1858     env = {
1859       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1860       }
1861     for node in self.cfg.GetAllNodesInfo().values():
1862       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1863
1864     return env, [], all_nodes
1865
1866   def Exec(self, feedback_fn):
1867     """Verify integrity of cluster, performing various test on nodes.
1868
1869     """
1870     self.bad = False
1871     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1872     verbose = self.op.verbose
1873     self._feedback_fn = feedback_fn
1874     feedback_fn("* Verifying global settings")
1875     for msg in self.cfg.VerifyConfig():
1876       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1877
1878     # Check the cluster certificates
1879     for cert_filename in constants.ALL_CERT_FILES:
1880       (errcode, msg) = _VerifyCertificate(cert_filename)
1881       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1882
1883     vg_name = self.cfg.GetVGName()
1884     drbd_helper = self.cfg.GetDRBDHelper()
1885     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1886     cluster = self.cfg.GetClusterInfo()
1887     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1888     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1889     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1890     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1891                         for iname in instancelist)
1892     i_non_redundant = [] # Non redundant instances
1893     i_non_a_balanced = [] # Non auto-balanced instances
1894     n_offline = 0 # Count of offline nodes
1895     n_drained = 0 # Count of nodes being drained
1896     node_vol_should = {}
1897
1898     # FIXME: verify OS list
1899     # do local checksums
1900     master_files = [constants.CLUSTER_CONF_FILE]
1901     master_node = self.master_node = self.cfg.GetMasterNode()
1902     master_ip = self.cfg.GetMasterIP()
1903
1904     file_names = ssconf.SimpleStore().GetFileList()
1905     file_names.extend(constants.ALL_CERT_FILES)
1906     file_names.extend(master_files)
1907     if cluster.modify_etc_hosts:
1908       file_names.append(constants.ETC_HOSTS)
1909
1910     local_checksums = utils.FingerprintFiles(file_names)
1911
1912     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1913     node_verify_param = {
1914       constants.NV_FILELIST: file_names,
1915       constants.NV_NODELIST: [node.name for node in nodeinfo
1916                               if not node.offline],
1917       constants.NV_HYPERVISOR: hypervisors,
1918       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1919                                   node.secondary_ip) for node in nodeinfo
1920                                  if not node.offline],
1921       constants.NV_INSTANCELIST: hypervisors,
1922       constants.NV_VERSION: None,
1923       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1924       constants.NV_NODESETUP: None,
1925       constants.NV_TIME: None,
1926       constants.NV_MASTERIP: (master_node, master_ip),
1927       constants.NV_OSLIST: None,
1928       }
1929
1930     if vg_name is not None:
1931       node_verify_param[constants.NV_VGLIST] = None
1932       node_verify_param[constants.NV_LVLIST] = vg_name
1933       node_verify_param[constants.NV_PVLIST] = [vg_name]
1934       node_verify_param[constants.NV_DRBDLIST] = None
1935
1936     if drbd_helper:
1937       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
1938
1939     # Build our expected cluster state
1940     node_image = dict((node.name, self.NodeImage(offline=node.offline,
1941                                                  name=node.name))
1942                       for node in nodeinfo)
1943
1944     for instance in instancelist:
1945       inst_config = instanceinfo[instance]
1946
1947       for nname in inst_config.all_nodes:
1948         if nname not in node_image:
1949           # ghost node
1950           gnode = self.NodeImage(name=nname)
1951           gnode.ghost = True
1952           node_image[nname] = gnode
1953
1954       inst_config.MapLVsByNode(node_vol_should)
1955
1956       pnode = inst_config.primary_node
1957       node_image[pnode].pinst.append(instance)
1958
1959       for snode in inst_config.secondary_nodes:
1960         nimg = node_image[snode]
1961         nimg.sinst.append(instance)
1962         if pnode not in nimg.sbp:
1963           nimg.sbp[pnode] = []
1964         nimg.sbp[pnode].append(instance)
1965
1966     # At this point, we have the in-memory data structures complete,
1967     # except for the runtime information, which we'll gather next
1968
1969     # Due to the way our RPC system works, exact response times cannot be
1970     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1971     # time before and after executing the request, we can at least have a time
1972     # window.
1973     nvinfo_starttime = time.time()
1974     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1975                                            self.cfg.GetClusterName())
1976     nvinfo_endtime = time.time()
1977
1978     all_drbd_map = self.cfg.ComputeDRBDMap()
1979
1980     feedback_fn("* Verifying node status")
1981
1982     refos_img = None
1983
1984     for node_i in nodeinfo:
1985       node = node_i.name
1986       nimg = node_image[node]
1987
1988       if node_i.offline:
1989         if verbose:
1990           feedback_fn("* Skipping offline node %s" % (node,))
1991         n_offline += 1
1992         continue
1993
1994       if node == master_node:
1995         ntype = "master"
1996       elif node_i.master_candidate:
1997         ntype = "master candidate"
1998       elif node_i.drained:
1999         ntype = "drained"
2000         n_drained += 1
2001       else:
2002         ntype = "regular"
2003       if verbose:
2004         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2005
2006       msg = all_nvinfo[node].fail_msg
2007       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2008       if msg:
2009         nimg.rpc_fail = True
2010         continue
2011
2012       nresult = all_nvinfo[node].payload
2013
2014       nimg.call_ok = self._VerifyNode(node_i, nresult)
2015       self._VerifyNodeNetwork(node_i, nresult)
2016       self._VerifyNodeLVM(node_i, nresult, vg_name)
2017       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
2018                             master_files)
2019       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2020                            all_drbd_map)
2021       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2022
2023       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2024       self._UpdateNodeInstances(node_i, nresult, nimg)
2025       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2026       self._UpdateNodeOS(node_i, nresult, nimg)
2027       if not nimg.os_fail:
2028         if refos_img is None:
2029           refos_img = nimg
2030         self._VerifyNodeOS(node_i, nimg, refos_img)
2031
2032     feedback_fn("* Verifying instance status")
2033     for instance in instancelist:
2034       if verbose:
2035         feedback_fn("* Verifying instance %s" % instance)
2036       inst_config = instanceinfo[instance]
2037       self._VerifyInstance(instance, inst_config, node_image)
2038       inst_nodes_offline = []
2039
2040       pnode = inst_config.primary_node
2041       pnode_img = node_image[pnode]
2042       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2043                self.ENODERPC, pnode, "instance %s, connection to"
2044                " primary node failed", instance)
2045
2046       if pnode_img.offline:
2047         inst_nodes_offline.append(pnode)
2048
2049       # If the instance is non-redundant we cannot survive losing its primary
2050       # node, so we are not N+1 compliant. On the other hand we have no disk
2051       # templates with more than one secondary so that situation is not well
2052       # supported either.
2053       # FIXME: does not support file-backed instances
2054       if not inst_config.secondary_nodes:
2055         i_non_redundant.append(instance)
2056       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2057                instance, "instance has multiple secondary nodes: %s",
2058                utils.CommaJoin(inst_config.secondary_nodes),
2059                code=self.ETYPE_WARNING)
2060
2061       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2062         i_non_a_balanced.append(instance)
2063
2064       for snode in inst_config.secondary_nodes:
2065         s_img = node_image[snode]
2066         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2067                  "instance %s, connection to secondary node failed", instance)
2068
2069         if s_img.offline:
2070           inst_nodes_offline.append(snode)
2071
2072       # warn that the instance lives on offline nodes
2073       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2074                "instance lives on offline node(s) %s",
2075                utils.CommaJoin(inst_nodes_offline))
2076       # ... or ghost nodes
2077       for node in inst_config.all_nodes:
2078         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2079                  "instance lives on ghost node %s", node)
2080
2081     feedback_fn("* Verifying orphan volumes")
2082     reserved = utils.FieldSet(*cluster.reserved_lvs)
2083     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2084
2085     feedback_fn("* Verifying orphan instances")
2086     self._VerifyOrphanInstances(instancelist, node_image)
2087
2088     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2089       feedback_fn("* Verifying N+1 Memory redundancy")
2090       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2091
2092     feedback_fn("* Other Notes")
2093     if i_non_redundant:
2094       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2095                   % len(i_non_redundant))
2096
2097     if i_non_a_balanced:
2098       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2099                   % len(i_non_a_balanced))
2100
2101     if n_offline:
2102       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2103
2104     if n_drained:
2105       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2106
2107     return not self.bad
2108
2109   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2110     """Analyze the post-hooks' result
2111
2112     This method analyses the hook result, handles it, and sends some
2113     nicely-formatted feedback back to the user.
2114
2115     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2116         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2117     @param hooks_results: the results of the multi-node hooks rpc call
2118     @param feedback_fn: function used send feedback back to the caller
2119     @param lu_result: previous Exec result
2120     @return: the new Exec result, based on the previous result
2121         and hook results
2122
2123     """
2124     # We only really run POST phase hooks, and are only interested in
2125     # their results
2126     if phase == constants.HOOKS_PHASE_POST:
2127       # Used to change hooks' output to proper indentation
2128       indent_re = re.compile('^', re.M)
2129       feedback_fn("* Hooks Results")
2130       assert hooks_results, "invalid result from hooks"
2131
2132       for node_name in hooks_results:
2133         res = hooks_results[node_name]
2134         msg = res.fail_msg
2135         test = msg and not res.offline
2136         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2137                       "Communication failure in hooks execution: %s", msg)
2138         if res.offline or msg:
2139           # No need to investigate payload if node is offline or gave an error.
2140           # override manually lu_result here as _ErrorIf only
2141           # overrides self.bad
2142           lu_result = 1
2143           continue
2144         for script, hkr, output in res.payload:
2145           test = hkr == constants.HKR_FAIL
2146           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2147                         "Script %s failed, output:", script)
2148           if test:
2149             output = indent_re.sub('      ', output)
2150             feedback_fn("%s" % output)
2151             lu_result = 0
2152
2153       return lu_result
2154
2155
2156 class LUVerifyDisks(NoHooksLU):
2157   """Verifies the cluster disks status.
2158
2159   """
2160   REQ_BGL = False
2161
2162   def ExpandNames(self):
2163     self.needed_locks = {
2164       locking.LEVEL_NODE: locking.ALL_SET,
2165       locking.LEVEL_INSTANCE: locking.ALL_SET,
2166     }
2167     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2168
2169   def Exec(self, feedback_fn):
2170     """Verify integrity of cluster disks.
2171
2172     @rtype: tuple of three items
2173     @return: a tuple of (dict of node-to-node_error, list of instances
2174         which need activate-disks, dict of instance: (node, volume) for
2175         missing volumes
2176
2177     """
2178     result = res_nodes, res_instances, res_missing = {}, [], {}
2179
2180     vg_name = self.cfg.GetVGName()
2181     nodes = utils.NiceSort(self.cfg.GetNodeList())
2182     instances = [self.cfg.GetInstanceInfo(name)
2183                  for name in self.cfg.GetInstanceList()]
2184
2185     nv_dict = {}
2186     for inst in instances:
2187       inst_lvs = {}
2188       if (not inst.admin_up or
2189           inst.disk_template not in constants.DTS_NET_MIRROR):
2190         continue
2191       inst.MapLVsByNode(inst_lvs)
2192       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2193       for node, vol_list in inst_lvs.iteritems():
2194         for vol in vol_list:
2195           nv_dict[(node, vol)] = inst
2196
2197     if not nv_dict:
2198       return result
2199
2200     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2201
2202     for node in nodes:
2203       # node_volume
2204       node_res = node_lvs[node]
2205       if node_res.offline:
2206         continue
2207       msg = node_res.fail_msg
2208       if msg:
2209         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2210         res_nodes[node] = msg
2211         continue
2212
2213       lvs = node_res.payload
2214       for lv_name, (_, _, lv_online) in lvs.items():
2215         inst = nv_dict.pop((node, lv_name), None)
2216         if (not lv_online and inst is not None
2217             and inst.name not in res_instances):
2218           res_instances.append(inst.name)
2219
2220     # any leftover items in nv_dict are missing LVs, let's arrange the
2221     # data better
2222     for key, inst in nv_dict.iteritems():
2223       if inst.name not in res_missing:
2224         res_missing[inst.name] = []
2225       res_missing[inst.name].append(key)
2226
2227     return result
2228
2229
2230 class LURepairDiskSizes(NoHooksLU):
2231   """Verifies the cluster disks sizes.
2232
2233   """
2234   _OP_PARAMS = [("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString))]
2235   REQ_BGL = False
2236
2237   def ExpandNames(self):
2238     if self.op.instances:
2239       self.wanted_names = []
2240       for name in self.op.instances:
2241         full_name = _ExpandInstanceName(self.cfg, name)
2242         self.wanted_names.append(full_name)
2243       self.needed_locks = {
2244         locking.LEVEL_NODE: [],
2245         locking.LEVEL_INSTANCE: self.wanted_names,
2246         }
2247       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2248     else:
2249       self.wanted_names = None
2250       self.needed_locks = {
2251         locking.LEVEL_NODE: locking.ALL_SET,
2252         locking.LEVEL_INSTANCE: locking.ALL_SET,
2253         }
2254     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2255
2256   def DeclareLocks(self, level):
2257     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2258       self._LockInstancesNodes(primary_only=True)
2259
2260   def CheckPrereq(self):
2261     """Check prerequisites.
2262
2263     This only checks the optional instance list against the existing names.
2264
2265     """
2266     if self.wanted_names is None:
2267       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2268
2269     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2270                              in self.wanted_names]
2271
2272   def _EnsureChildSizes(self, disk):
2273     """Ensure children of the disk have the needed disk size.
2274
2275     This is valid mainly for DRBD8 and fixes an issue where the
2276     children have smaller disk size.
2277
2278     @param disk: an L{ganeti.objects.Disk} object
2279
2280     """
2281     if disk.dev_type == constants.LD_DRBD8:
2282       assert disk.children, "Empty children for DRBD8?"
2283       fchild = disk.children[0]
2284       mismatch = fchild.size < disk.size
2285       if mismatch:
2286         self.LogInfo("Child disk has size %d, parent %d, fixing",
2287                      fchild.size, disk.size)
2288         fchild.size = disk.size
2289
2290       # and we recurse on this child only, not on the metadev
2291       return self._EnsureChildSizes(fchild) or mismatch
2292     else:
2293       return False
2294
2295   def Exec(self, feedback_fn):
2296     """Verify the size of cluster disks.
2297
2298     """
2299     # TODO: check child disks too
2300     # TODO: check differences in size between primary/secondary nodes
2301     per_node_disks = {}
2302     for instance in self.wanted_instances:
2303       pnode = instance.primary_node
2304       if pnode not in per_node_disks:
2305         per_node_disks[pnode] = []
2306       for idx, disk in enumerate(instance.disks):
2307         per_node_disks[pnode].append((instance, idx, disk))
2308
2309     changed = []
2310     for node, dskl in per_node_disks.items():
2311       newl = [v[2].Copy() for v in dskl]
2312       for dsk in newl:
2313         self.cfg.SetDiskID(dsk, node)
2314       result = self.rpc.call_blockdev_getsizes(node, newl)
2315       if result.fail_msg:
2316         self.LogWarning("Failure in blockdev_getsizes call to node"
2317                         " %s, ignoring", node)
2318         continue
2319       if len(result.data) != len(dskl):
2320         self.LogWarning("Invalid result from node %s, ignoring node results",
2321                         node)
2322         continue
2323       for ((instance, idx, disk), size) in zip(dskl, result.data):
2324         if size is None:
2325           self.LogWarning("Disk %d of instance %s did not return size"
2326                           " information, ignoring", idx, instance.name)
2327           continue
2328         if not isinstance(size, (int, long)):
2329           self.LogWarning("Disk %d of instance %s did not return valid"
2330                           " size information, ignoring", idx, instance.name)
2331           continue
2332         size = size >> 20
2333         if size != disk.size:
2334           self.LogInfo("Disk %d of instance %s has mismatched size,"
2335                        " correcting: recorded %d, actual %d", idx,
2336                        instance.name, disk.size, size)
2337           disk.size = size
2338           self.cfg.Update(instance, feedback_fn)
2339           changed.append((instance.name, idx, size))
2340         if self._EnsureChildSizes(disk):
2341           self.cfg.Update(instance, feedback_fn)
2342           changed.append((instance.name, idx, disk.size))
2343     return changed
2344
2345
2346 class LURenameCluster(LogicalUnit):
2347   """Rename the cluster.
2348
2349   """
2350   HPATH = "cluster-rename"
2351   HTYPE = constants.HTYPE_CLUSTER
2352   _OP_PARAMS = [("name", ht.NoDefault, ht.TNonEmptyString)]
2353
2354   def BuildHooksEnv(self):
2355     """Build hooks env.
2356
2357     """
2358     env = {
2359       "OP_TARGET": self.cfg.GetClusterName(),
2360       "NEW_NAME": self.op.name,
2361       }
2362     mn = self.cfg.GetMasterNode()
2363     all_nodes = self.cfg.GetNodeList()
2364     return env, [mn], all_nodes
2365
2366   def CheckPrereq(self):
2367     """Verify that the passed name is a valid one.
2368
2369     """
2370     hostname = netutils.GetHostname(name=self.op.name,
2371                                     family=self.cfg.GetPrimaryIPFamily())
2372
2373     new_name = hostname.name
2374     self.ip = new_ip = hostname.ip
2375     old_name = self.cfg.GetClusterName()
2376     old_ip = self.cfg.GetMasterIP()
2377     if new_name == old_name and new_ip == old_ip:
2378       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2379                                  " cluster has changed",
2380                                  errors.ECODE_INVAL)
2381     if new_ip != old_ip:
2382       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2383         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2384                                    " reachable on the network" %
2385                                    new_ip, errors.ECODE_NOTUNIQUE)
2386
2387     self.op.name = new_name
2388
2389   def Exec(self, feedback_fn):
2390     """Rename the cluster.
2391
2392     """
2393     clustername = self.op.name
2394     ip = self.ip
2395
2396     # shutdown the master IP
2397     master = self.cfg.GetMasterNode()
2398     result = self.rpc.call_node_stop_master(master, False)
2399     result.Raise("Could not disable the master role")
2400
2401     try:
2402       cluster = self.cfg.GetClusterInfo()
2403       cluster.cluster_name = clustername
2404       cluster.master_ip = ip
2405       self.cfg.Update(cluster, feedback_fn)
2406
2407       # update the known hosts file
2408       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2409       node_list = self.cfg.GetNodeList()
2410       try:
2411         node_list.remove(master)
2412       except ValueError:
2413         pass
2414       result = self.rpc.call_upload_file(node_list,
2415                                          constants.SSH_KNOWN_HOSTS_FILE)
2416       for to_node, to_result in result.iteritems():
2417         msg = to_result.fail_msg
2418         if msg:
2419           msg = ("Copy of file %s to node %s failed: %s" %
2420                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2421           self.proc.LogWarning(msg)
2422
2423     finally:
2424       result = self.rpc.call_node_start_master(master, False, False)
2425       msg = result.fail_msg
2426       if msg:
2427         self.LogWarning("Could not re-enable the master role on"
2428                         " the master, please restart manually: %s", msg)
2429
2430     return clustername
2431
2432
2433 class LUSetClusterParams(LogicalUnit):
2434   """Change the parameters of the cluster.
2435
2436   """
2437   HPATH = "cluster-modify"
2438   HTYPE = constants.HTYPE_CLUSTER
2439   _OP_PARAMS = [
2440     ("vg_name", None, ht.TMaybeString),
2441     ("enabled_hypervisors", None,
2442      ht.TOr(ht.TAnd(ht.TListOf(ht.TElemOf(constants.HYPER_TYPES)), ht.TTrue),
2443             ht.TNone)),
2444     ("hvparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2445                               ht.TNone)),
2446     ("beparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2447                               ht.TNone)),
2448     ("os_hvp", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2449                             ht.TNone)),
2450     ("osparams", None, ht.TOr(ht.TDictOf(ht.TNonEmptyString, ht.TDict),
2451                               ht.TNone)),
2452     ("candidate_pool_size", None, ht.TOr(ht.TStrictPositiveInt, ht.TNone)),
2453     ("uid_pool", None, ht.NoType),
2454     ("add_uids", None, ht.NoType),
2455     ("remove_uids", None, ht.NoType),
2456     ("maintain_node_health", None, ht.TMaybeBool),
2457     ("nicparams", None, ht.TOr(ht.TDict, ht.TNone)),
2458     ("drbd_helper", None, ht.TOr(ht.TString, ht.TNone)),
2459     ("default_iallocator", None, ht.TMaybeString),
2460     ("reserved_lvs", None, ht.TOr(ht.TListOf(ht.TNonEmptyString), ht.TNone)),
2461     ("hidden_os", None, ht.TOr(ht.TListOf(\
2462           ht.TAnd(ht.TList,
2463                 ht.TIsLength(2),
2464                 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2465           ht.TNone)),
2466     ("blacklisted_os", None, ht.TOr(ht.TListOf(\
2467           ht.TAnd(ht.TList,
2468                 ht.TIsLength(2),
2469                 ht.TMap(lambda v: v[0], ht.TElemOf(constants.DDMS_VALUES)))),
2470           ht.TNone)),
2471     ]
2472   REQ_BGL = False
2473
2474   def CheckArguments(self):
2475     """Check parameters
2476
2477     """
2478     if self.op.uid_pool:
2479       uidpool.CheckUidPool(self.op.uid_pool)
2480
2481     if self.op.add_uids:
2482       uidpool.CheckUidPool(self.op.add_uids)
2483
2484     if self.op.remove_uids:
2485       uidpool.CheckUidPool(self.op.remove_uids)
2486
2487   def ExpandNames(self):
2488     # FIXME: in the future maybe other cluster params won't require checking on
2489     # all nodes to be modified.
2490     self.needed_locks = {
2491       locking.LEVEL_NODE: locking.ALL_SET,
2492     }
2493     self.share_locks[locking.LEVEL_NODE] = 1
2494
2495   def BuildHooksEnv(self):
2496     """Build hooks env.
2497
2498     """
2499     env = {
2500       "OP_TARGET": self.cfg.GetClusterName(),
2501       "NEW_VG_NAME": self.op.vg_name,
2502       }
2503     mn = self.cfg.GetMasterNode()
2504     return env, [mn], [mn]
2505
2506   def CheckPrereq(self):
2507     """Check prerequisites.
2508
2509     This checks whether the given params don't conflict and
2510     if the given volume group is valid.
2511
2512     """
2513     if self.op.vg_name is not None and not self.op.vg_name:
2514       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2515         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2516                                    " instances exist", errors.ECODE_INVAL)
2517
2518     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2519       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2520         raise errors.OpPrereqError("Cannot disable drbd helper while"
2521                                    " drbd-based instances exist",
2522                                    errors.ECODE_INVAL)
2523
2524     node_list = self.acquired_locks[locking.LEVEL_NODE]
2525
2526     # if vg_name not None, checks given volume group on all nodes
2527     if self.op.vg_name:
2528       vglist = self.rpc.call_vg_list(node_list)
2529       for node in node_list:
2530         msg = vglist[node].fail_msg
2531         if msg:
2532           # ignoring down node
2533           self.LogWarning("Error while gathering data on node %s"
2534                           " (ignoring node): %s", node, msg)
2535           continue
2536         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2537                                               self.op.vg_name,
2538                                               constants.MIN_VG_SIZE)
2539         if vgstatus:
2540           raise errors.OpPrereqError("Error on node '%s': %s" %
2541                                      (node, vgstatus), errors.ECODE_ENVIRON)
2542
2543     if self.op.drbd_helper:
2544       # checks given drbd helper on all nodes
2545       helpers = self.rpc.call_drbd_helper(node_list)
2546       for node in node_list:
2547         ninfo = self.cfg.GetNodeInfo(node)
2548         if ninfo.offline:
2549           self.LogInfo("Not checking drbd helper on offline node %s", node)
2550           continue
2551         msg = helpers[node].fail_msg
2552         if msg:
2553           raise errors.OpPrereqError("Error checking drbd helper on node"
2554                                      " '%s': %s" % (node, msg),
2555                                      errors.ECODE_ENVIRON)
2556         node_helper = helpers[node].payload
2557         if node_helper != self.op.drbd_helper:
2558           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2559                                      (node, node_helper), errors.ECODE_ENVIRON)
2560
2561     self.cluster = cluster = self.cfg.GetClusterInfo()
2562     # validate params changes
2563     if self.op.beparams:
2564       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2565       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2566
2567     if self.op.nicparams:
2568       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2569       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2570       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2571       nic_errors = []
2572
2573       # check all instances for consistency
2574       for instance in self.cfg.GetAllInstancesInfo().values():
2575         for nic_idx, nic in enumerate(instance.nics):
2576           params_copy = copy.deepcopy(nic.nicparams)
2577           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2578
2579           # check parameter syntax
2580           try:
2581             objects.NIC.CheckParameterSyntax(params_filled)
2582           except errors.ConfigurationError, err:
2583             nic_errors.append("Instance %s, nic/%d: %s" %
2584                               (instance.name, nic_idx, err))
2585
2586           # if we're moving instances to routed, check that they have an ip
2587           target_mode = params_filled[constants.NIC_MODE]
2588           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2589             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2590                               (instance.name, nic_idx))
2591       if nic_errors:
2592         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2593                                    "\n".join(nic_errors))
2594
2595     # hypervisor list/parameters
2596     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2597     if self.op.hvparams:
2598       for hv_name, hv_dict in self.op.hvparams.items():
2599         if hv_name not in self.new_hvparams:
2600           self.new_hvparams[hv_name] = hv_dict
2601         else:
2602           self.new_hvparams[hv_name].update(hv_dict)
2603
2604     # os hypervisor parameters
2605     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2606     if self.op.os_hvp:
2607       for os_name, hvs in self.op.os_hvp.items():
2608         if os_name not in self.new_os_hvp:
2609           self.new_os_hvp[os_name] = hvs
2610         else:
2611           for hv_name, hv_dict in hvs.items():
2612             if hv_name not in self.new_os_hvp[os_name]:
2613               self.new_os_hvp[os_name][hv_name] = hv_dict
2614             else:
2615               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2616
2617     # os parameters
2618     self.new_osp = objects.FillDict(cluster.osparams, {})
2619     if self.op.osparams:
2620       for os_name, osp in self.op.osparams.items():
2621         if os_name not in self.new_osp:
2622           self.new_osp[os_name] = {}
2623
2624         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2625                                                   use_none=True)
2626
2627         if not self.new_osp[os_name]:
2628           # we removed all parameters
2629           del self.new_osp[os_name]
2630         else:
2631           # check the parameter validity (remote check)
2632           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2633                          os_name, self.new_osp[os_name])
2634
2635     # changes to the hypervisor list
2636     if self.op.enabled_hypervisors is not None:
2637       self.hv_list = self.op.enabled_hypervisors
2638       for hv in self.hv_list:
2639         # if the hypervisor doesn't already exist in the cluster
2640         # hvparams, we initialize it to empty, and then (in both
2641         # cases) we make sure to fill the defaults, as we might not
2642         # have a complete defaults list if the hypervisor wasn't
2643         # enabled before
2644         if hv not in new_hvp:
2645           new_hvp[hv] = {}
2646         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2647         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2648     else:
2649       self.hv_list = cluster.enabled_hypervisors
2650
2651     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2652       # either the enabled list has changed, or the parameters have, validate
2653       for hv_name, hv_params in self.new_hvparams.items():
2654         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2655             (self.op.enabled_hypervisors and
2656              hv_name in self.op.enabled_hypervisors)):
2657           # either this is a new hypervisor, or its parameters have changed
2658           hv_class = hypervisor.GetHypervisor(hv_name)
2659           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2660           hv_class.CheckParameterSyntax(hv_params)
2661           _CheckHVParams(self, node_list, hv_name, hv_params)
2662
2663     if self.op.os_hvp:
2664       # no need to check any newly-enabled hypervisors, since the
2665       # defaults have already been checked in the above code-block
2666       for os_name, os_hvp in self.new_os_hvp.items():
2667         for hv_name, hv_params in os_hvp.items():
2668           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2669           # we need to fill in the new os_hvp on top of the actual hv_p
2670           cluster_defaults = self.new_hvparams.get(hv_name, {})
2671           new_osp = objects.FillDict(cluster_defaults, hv_params)
2672           hv_class = hypervisor.GetHypervisor(hv_name)
2673           hv_class.CheckParameterSyntax(new_osp)
2674           _CheckHVParams(self, node_list, hv_name, new_osp)
2675
2676     if self.op.default_iallocator:
2677       alloc_script = utils.FindFile(self.op.default_iallocator,
2678                                     constants.IALLOCATOR_SEARCH_PATH,
2679                                     os.path.isfile)
2680       if alloc_script is None:
2681         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
2682                                    " specified" % self.op.default_iallocator,
2683                                    errors.ECODE_INVAL)
2684
2685   def Exec(self, feedback_fn):
2686     """Change the parameters of the cluster.
2687
2688     """
2689     if self.op.vg_name is not None:
2690       new_volume = self.op.vg_name
2691       if not new_volume:
2692         new_volume = None
2693       if new_volume != self.cfg.GetVGName():
2694         self.cfg.SetVGName(new_volume)
2695       else:
2696         feedback_fn("Cluster LVM configuration already in desired"
2697                     " state, not changing")
2698     if self.op.drbd_helper is not None:
2699       new_helper = self.op.drbd_helper
2700       if not new_helper:
2701         new_helper = None
2702       if new_helper != self.cfg.GetDRBDHelper():
2703         self.cfg.SetDRBDHelper(new_helper)
2704       else:
2705         feedback_fn("Cluster DRBD helper already in desired state,"
2706                     " not changing")
2707     if self.op.hvparams:
2708       self.cluster.hvparams = self.new_hvparams
2709     if self.op.os_hvp:
2710       self.cluster.os_hvp = self.new_os_hvp
2711     if self.op.enabled_hypervisors is not None:
2712       self.cluster.hvparams = self.new_hvparams
2713       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2714     if self.op.beparams:
2715       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2716     if self.op.nicparams:
2717       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2718     if self.op.osparams:
2719       self.cluster.osparams = self.new_osp
2720
2721     if self.op.candidate_pool_size is not None:
2722       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2723       # we need to update the pool size here, otherwise the save will fail
2724       _AdjustCandidatePool(self, [])
2725
2726     if self.op.maintain_node_health is not None:
2727       self.cluster.maintain_node_health = self.op.maintain_node_health
2728
2729     if self.op.add_uids is not None:
2730       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2731
2732     if self.op.remove_uids is not None:
2733       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2734
2735     if self.op.uid_pool is not None:
2736       self.cluster.uid_pool = self.op.uid_pool
2737
2738     if self.op.default_iallocator is not None:
2739       self.cluster.default_iallocator = self.op.default_iallocator
2740
2741     if self.op.reserved_lvs is not None:
2742       self.cluster.reserved_lvs = self.op.reserved_lvs
2743
2744     def helper_os(aname, mods, desc):
2745       desc += " OS list"
2746       lst = getattr(self.cluster, aname)
2747       for key, val in mods:
2748         if key == constants.DDM_ADD:
2749           if val in lst:
2750             feedback_fn("OS %s already in %s, ignoring", val, desc)
2751           else:
2752             lst.append(val)
2753         elif key == constants.DDM_REMOVE:
2754           if val in lst:
2755             lst.remove(val)
2756           else:
2757             feedback_fn("OS %s not found in %s, ignoring", val, desc)
2758         else:
2759           raise errors.ProgrammerError("Invalid modification '%s'" % key)
2760
2761     if self.op.hidden_os:
2762       helper_os("hidden_os", self.op.hidden_os, "hidden")
2763
2764     if self.op.blacklisted_os:
2765       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
2766
2767     self.cfg.Update(self.cluster, feedback_fn)
2768
2769
2770 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2771   """Distribute additional files which are part of the cluster configuration.
2772
2773   ConfigWriter takes care of distributing the config and ssconf files, but
2774   there are more files which should be distributed to all nodes. This function
2775   makes sure those are copied.
2776
2777   @param lu: calling logical unit
2778   @param additional_nodes: list of nodes not in the config to distribute to
2779
2780   """
2781   # 1. Gather target nodes
2782   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2783   dist_nodes = lu.cfg.GetOnlineNodeList()
2784   if additional_nodes is not None:
2785     dist_nodes.extend(additional_nodes)
2786   if myself.name in dist_nodes:
2787     dist_nodes.remove(myself.name)
2788
2789   # 2. Gather files to distribute
2790   dist_files = set([constants.ETC_HOSTS,
2791                     constants.SSH_KNOWN_HOSTS_FILE,
2792                     constants.RAPI_CERT_FILE,
2793                     constants.RAPI_USERS_FILE,
2794                     constants.CONFD_HMAC_KEY,
2795                     constants.CLUSTER_DOMAIN_SECRET_FILE,
2796                    ])
2797
2798   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2799   for hv_name in enabled_hypervisors:
2800     hv_class = hypervisor.GetHypervisor(hv_name)
2801     dist_files.update(hv_class.GetAncillaryFiles())
2802
2803   # 3. Perform the files upload
2804   for fname in dist_files:
2805     if os.path.exists(fname):
2806       result = lu.rpc.call_upload_file(dist_nodes, fname)
2807       for to_node, to_result in result.items():
2808         msg = to_result.fail_msg
2809         if msg:
2810           msg = ("Copy of file %s to node %s failed: %s" %
2811                  (fname, to_node, msg))
2812           lu.proc.LogWarning(msg)
2813
2814
2815 class LURedistributeConfig(NoHooksLU):
2816   """Force the redistribution of cluster configuration.
2817
2818   This is a very simple LU.
2819
2820   """
2821   REQ_BGL = False
2822
2823   def ExpandNames(self):
2824     self.needed_locks = {
2825       locking.LEVEL_NODE: locking.ALL_SET,
2826     }
2827     self.share_locks[locking.LEVEL_NODE] = 1
2828
2829   def Exec(self, feedback_fn):
2830     """Redistribute the configuration.
2831
2832     """
2833     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2834     _RedistributeAncillaryFiles(self)
2835
2836
2837 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2838   """Sleep and poll for an instance's disk to sync.
2839
2840   """
2841   if not instance.disks or disks is not None and not disks:
2842     return True
2843
2844   disks = _ExpandCheckDisks(instance, disks)
2845
2846   if not oneshot:
2847     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2848
2849   node = instance.primary_node
2850
2851   for dev in disks:
2852     lu.cfg.SetDiskID(dev, node)
2853
2854   # TODO: Convert to utils.Retry
2855
2856   retries = 0
2857   degr_retries = 10 # in seconds, as we sleep 1 second each time
2858   while True:
2859     max_time = 0
2860     done = True
2861     cumul_degraded = False
2862     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2863     msg = rstats.fail_msg
2864     if msg:
2865       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2866       retries += 1
2867       if retries >= 10:
2868         raise errors.RemoteError("Can't contact node %s for mirror data,"
2869                                  " aborting." % node)
2870       time.sleep(6)
2871       continue
2872     rstats = rstats.payload
2873     retries = 0
2874     for i, mstat in enumerate(rstats):
2875       if mstat is None:
2876         lu.LogWarning("Can't compute data for node %s/%s",
2877                            node, disks[i].iv_name)
2878         continue
2879
2880       cumul_degraded = (cumul_degraded or
2881                         (mstat.is_degraded and mstat.sync_percent is None))
2882       if mstat.sync_percent is not None:
2883         done = False
2884         if mstat.estimated_time is not None:
2885           rem_time = ("%s remaining (estimated)" %
2886                       utils.FormatSeconds(mstat.estimated_time))
2887           max_time = mstat.estimated_time
2888         else:
2889           rem_time = "no time estimate"
2890         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2891                         (disks[i].iv_name, mstat.sync_percent, rem_time))
2892
2893     # if we're done but degraded, let's do a few small retries, to
2894     # make sure we see a stable and not transient situation; therefore
2895     # we force restart of the loop
2896     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2897       logging.info("Degraded disks found, %d retries left", degr_retries)
2898       degr_retries -= 1
2899       time.sleep(1)
2900       continue
2901
2902     if done or oneshot:
2903       break
2904
2905     time.sleep(min(60, max_time))
2906
2907   if done:
2908     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2909   return not cumul_degraded
2910
2911
2912 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2913   """Check that mirrors are not degraded.
2914
2915   The ldisk parameter, if True, will change the test from the
2916   is_degraded attribute (which represents overall non-ok status for
2917   the device(s)) to the ldisk (representing the local storage status).
2918
2919   """
2920   lu.cfg.SetDiskID(dev, node)
2921
2922   result = True
2923
2924   if on_primary or dev.AssembleOnSecondary():
2925     rstats = lu.rpc.call_blockdev_find(node, dev)
2926     msg = rstats.fail_msg
2927     if msg:
2928       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2929       result = False
2930     elif not rstats.payload:
2931       lu.LogWarning("Can't find disk on node %s", node)
2932       result = False
2933     else:
2934       if ldisk:
2935         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2936       else:
2937         result = result and not rstats.payload.is_degraded
2938
2939   if dev.children:
2940     for child in dev.children:
2941       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2942
2943   return result
2944
2945
2946 class LUDiagnoseOS(NoHooksLU):
2947   """Logical unit for OS diagnose/query.
2948
2949   """
2950   _OP_PARAMS = [
2951     _POutputFields,
2952     ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
2953     ]
2954   REQ_BGL = False
2955   _HID = "hidden"
2956   _BLK = "blacklisted"
2957   _VLD = "valid"
2958   _FIELDS_STATIC = utils.FieldSet()
2959   _FIELDS_DYNAMIC = utils.FieldSet("name", _VLD, "node_status", "variants",
2960                                    "parameters", "api_versions", _HID, _BLK)
2961
2962   def CheckArguments(self):
2963     if self.op.names:
2964       raise errors.OpPrereqError("Selective OS query not supported",
2965                                  errors.ECODE_INVAL)
2966
2967     _CheckOutputFields(static=self._FIELDS_STATIC,
2968                        dynamic=self._FIELDS_DYNAMIC,
2969                        selected=self.op.output_fields)
2970
2971   def ExpandNames(self):
2972     # Lock all nodes, in shared mode
2973     # Temporary removal of locks, should be reverted later
2974     # TODO: reintroduce locks when they are lighter-weight
2975     self.needed_locks = {}
2976     #self.share_locks[locking.LEVEL_NODE] = 1
2977     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2978
2979   @staticmethod
2980   def _DiagnoseByOS(rlist):
2981     """Remaps a per-node return list into an a per-os per-node dictionary
2982
2983     @param rlist: a map with node names as keys and OS objects as values
2984
2985     @rtype: dict
2986     @return: a dictionary with osnames as keys and as value another
2987         map, with nodes as keys and tuples of (path, status, diagnose,
2988         variants, parameters, api_versions) as values, eg::
2989
2990           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2991                                      (/srv/..., False, "invalid api")],
2992                            "node2": [(/srv/..., True, "", [], [])]}
2993           }
2994
2995     """
2996     all_os = {}
2997     # we build here the list of nodes that didn't fail the RPC (at RPC
2998     # level), so that nodes with a non-responding node daemon don't
2999     # make all OSes invalid
3000     good_nodes = [node_name for node_name in rlist
3001                   if not rlist[node_name].fail_msg]
3002     for node_name, nr in rlist.items():
3003       if nr.fail_msg or not nr.payload:
3004         continue
3005       for (name, path, status, diagnose, variants,
3006            params, api_versions) in nr.payload:
3007         if name not in all_os:
3008           # build a list of nodes for this os containing empty lists
3009           # for each node in node_list
3010           all_os[name] = {}
3011           for nname in good_nodes:
3012             all_os[name][nname] = []
3013         # convert params from [name, help] to (name, help)
3014         params = [tuple(v) for v in params]
3015         all_os[name][node_name].append((path, status, diagnose,
3016                                         variants, params, api_versions))
3017     return all_os
3018
3019   def Exec(self, feedback_fn):
3020     """Compute the list of OSes.
3021
3022     """
3023     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
3024     node_data = self.rpc.call_os_diagnose(valid_nodes)
3025     pol = self._DiagnoseByOS(node_data)
3026     output = []
3027     cluster = self.cfg.GetClusterInfo()
3028
3029     for os_name in utils.NiceSort(pol.keys()):
3030       os_data = pol[os_name]
3031       row = []
3032       valid = True
3033       (variants, params, api_versions) = null_state = (set(), set(), set())
3034       for idx, osl in enumerate(os_data.values()):
3035         valid = bool(valid and osl and osl[0][1])
3036         if not valid:
3037           (variants, params, api_versions) = null_state
3038           break
3039         node_variants, node_params, node_api = osl[0][3:6]
3040         if idx == 0: # first entry
3041           variants = set(node_variants)
3042           params = set(node_params)
3043           api_versions = set(node_api)
3044         else: # keep consistency
3045           variants.intersection_update(node_variants)
3046           params.intersection_update(node_params)
3047           api_versions.intersection_update(node_api)
3048
3049       is_hid = os_name in cluster.hidden_os
3050       is_blk = os_name in cluster.blacklisted_os
3051       if ((self._HID not in self.op.output_fields and is_hid) or
3052           (self._BLK not in self.op.output_fields and is_blk) or
3053           (self._VLD not in self.op.output_fields and not valid)):
3054         continue
3055
3056       for field in self.op.output_fields:
3057         if field == "name":
3058           val = os_name
3059         elif field == self._VLD:
3060           val = valid
3061         elif field == "node_status":
3062           # this is just a copy of the dict
3063           val = {}
3064           for node_name, nos_list in os_data.items():
3065             val[node_name] = nos_list
3066         elif field == "variants":
3067           val = utils.NiceSort(list(variants))
3068         elif field == "parameters":
3069           val = list(params)
3070         elif field == "api_versions":
3071           val = list(api_versions)
3072         elif field == self._HID:
3073           val = is_hid
3074         elif field == self._BLK:
3075           val = is_blk
3076         else:
3077           raise errors.ParameterError(field)
3078         row.append(val)
3079       output.append(row)
3080
3081     return output
3082
3083
3084 class LURemoveNode(LogicalUnit):
3085   """Logical unit for removing a node.
3086
3087   """
3088   HPATH = "node-remove"
3089   HTYPE = constants.HTYPE_NODE
3090   _OP_PARAMS = [
3091     _PNodeName,
3092     ]
3093
3094   def BuildHooksEnv(self):
3095     """Build hooks env.
3096
3097     This doesn't run on the target node in the pre phase as a failed
3098     node would then be impossible to remove.
3099
3100     """
3101     env = {
3102       "OP_TARGET": self.op.node_name,
3103       "NODE_NAME": self.op.node_name,
3104       }
3105     all_nodes = self.cfg.GetNodeList()
3106     try:
3107       all_nodes.remove(self.op.node_name)
3108     except ValueError:
3109       logging.warning("Node %s which is about to be removed not found"
3110                       " in the all nodes list", self.op.node_name)
3111     return env, all_nodes, all_nodes
3112
3113   def CheckPrereq(self):
3114     """Check prerequisites.
3115
3116     This checks:
3117      - the node exists in the configuration
3118      - it does not have primary or secondary instances
3119      - it's not the master
3120
3121     Any errors are signaled by raising errors.OpPrereqError.
3122
3123     """
3124     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3125     node = self.cfg.GetNodeInfo(self.op.node_name)
3126     assert node is not None
3127
3128     instance_list = self.cfg.GetInstanceList()
3129
3130     masternode = self.cfg.GetMasterNode()
3131     if node.name == masternode:
3132       raise errors.OpPrereqError("Node is the master node,"
3133                                  " you need to failover first.",
3134                                  errors.ECODE_INVAL)
3135
3136     for instance_name in instance_list:
3137       instance = self.cfg.GetInstanceInfo(instance_name)
3138       if node.name in instance.all_nodes:
3139         raise errors.OpPrereqError("Instance %s is still running on the node,"
3140                                    " please remove first." % instance_name,
3141                                    errors.ECODE_INVAL)
3142     self.op.node_name = node.name
3143     self.node = node
3144
3145   def Exec(self, feedback_fn):
3146     """Removes the node from the cluster.
3147
3148     """
3149     node = self.node
3150     logging.info("Stopping the node daemon and removing configs from node %s",
3151                  node.name)
3152
3153     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3154
3155     # Promote nodes to master candidate as needed
3156     _AdjustCandidatePool(self, exceptions=[node.name])
3157     self.context.RemoveNode(node.name)
3158
3159     # Run post hooks on the node before it's removed
3160     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3161     try:
3162       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3163     except:
3164       # pylint: disable-msg=W0702
3165       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3166
3167     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3168     msg = result.fail_msg
3169     if msg:
3170       self.LogWarning("Errors encountered on the remote node while leaving"
3171                       " the cluster: %s", msg)
3172
3173     # Remove node from our /etc/hosts
3174     if self.cfg.GetClusterInfo().modify_etc_hosts:
3175       master_node = self.cfg.GetMasterNode()
3176       result = self.rpc.call_etc_hosts_modify(master_node,
3177                                               constants.ETC_HOSTS_REMOVE,
3178                                               node.name, None)
3179       result.Raise("Can't update hosts file with new host data")
3180       _RedistributeAncillaryFiles(self)
3181
3182
3183 class LUQueryNodes(NoHooksLU):
3184   """Logical unit for querying nodes.
3185
3186   """
3187   # pylint: disable-msg=W0142
3188   _OP_PARAMS = [
3189     _POutputFields,
3190     ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3191     ("use_locking", False, ht.TBool),
3192     ]
3193   REQ_BGL = False
3194
3195   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3196                     "master_candidate", "offline", "drained"]
3197
3198   _FIELDS_DYNAMIC = utils.FieldSet(
3199     "dtotal", "dfree",
3200     "mtotal", "mnode", "mfree",
3201     "bootid",
3202     "ctotal", "cnodes", "csockets",
3203     )
3204
3205   _FIELDS_STATIC = utils.FieldSet(*[
3206     "pinst_cnt", "sinst_cnt",
3207     "pinst_list", "sinst_list",
3208     "pip", "sip", "tags",
3209     "master",
3210     "role"] + _SIMPLE_FIELDS
3211     )
3212
3213   def CheckArguments(self):
3214     _CheckOutputFields(static=self._FIELDS_STATIC,
3215                        dynamic=self._FIELDS_DYNAMIC,
3216                        selected=self.op.output_fields)
3217
3218   def ExpandNames(self):
3219     self.needed_locks = {}
3220     self.share_locks[locking.LEVEL_NODE] = 1
3221
3222     if self.op.names:
3223       self.wanted = _GetWantedNodes(self, self.op.names)
3224     else:
3225       self.wanted = locking.ALL_SET
3226
3227     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3228     self.do_locking = self.do_node_query and self.op.use_locking
3229     if self.do_locking:
3230       # if we don't request only static fields, we need to lock the nodes
3231       self.needed_locks[locking.LEVEL_NODE] = self.wanted
3232
3233   def Exec(self, feedback_fn):
3234     """Computes the list of nodes and their attributes.
3235
3236     """
3237     all_info = self.cfg.GetAllNodesInfo()
3238     if self.do_locking:
3239       nodenames = self.acquired_locks[locking.LEVEL_NODE]
3240     elif self.wanted != locking.ALL_SET:
3241       nodenames = self.wanted
3242       missing = set(nodenames).difference(all_info.keys())
3243       if missing:
3244         raise errors.OpExecError(
3245           "Some nodes were removed before retrieving their data: %s" % missing)
3246     else:
3247       nodenames = all_info.keys()
3248
3249     nodenames = utils.NiceSort(nodenames)
3250     nodelist = [all_info[name] for name in nodenames]
3251
3252     # begin data gathering
3253
3254     if self.do_node_query:
3255       live_data = {}
3256       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3257                                           self.cfg.GetHypervisorType())
3258       for name in nodenames:
3259         nodeinfo = node_data[name]
3260         if not nodeinfo.fail_msg and nodeinfo.payload:
3261           nodeinfo = nodeinfo.payload
3262           fn = utils.TryConvert
3263           live_data[name] = {
3264             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3265             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3266             "mfree": fn(int, nodeinfo.get('memory_free', None)),
3267             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3268             "dfree": fn(int, nodeinfo.get('vg_free', None)),
3269             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3270             "bootid": nodeinfo.get('bootid', None),
3271             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3272             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3273             }
3274         else:
3275           live_data[name] = {}
3276     else:
3277       live_data = dict.fromkeys(nodenames, {})
3278
3279     node_to_primary = dict([(name, set()) for name in nodenames])
3280     node_to_secondary = dict([(name, set()) for name in nodenames])
3281
3282     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3283                              "sinst_cnt", "sinst_list"))
3284     if inst_fields & frozenset(self.op.output_fields):
3285       inst_data = self.cfg.GetAllInstancesInfo()
3286
3287       for inst in inst_data.values():
3288         if inst.primary_node in node_to_primary:
3289           node_to_primary[inst.primary_node].add(inst.name)
3290         for secnode in inst.secondary_nodes:
3291           if secnode in node_to_secondary:
3292             node_to_secondary[secnode].add(inst.name)
3293
3294     master_node = self.cfg.GetMasterNode()
3295
3296     # end data gathering
3297
3298     output = []
3299     for node in nodelist:
3300       node_output = []
3301       for field in self.op.output_fields:
3302         if field in self._SIMPLE_FIELDS:
3303           val = getattr(node, field)
3304         elif field == "pinst_list":
3305           val = list(node_to_primary[node.name])
3306         elif field == "sinst_list":
3307           val = list(node_to_secondary[node.name])
3308         elif field == "pinst_cnt":
3309           val = len(node_to_primary[node.name])
3310         elif field == "sinst_cnt":
3311           val = len(node_to_secondary[node.name])
3312         elif field == "pip":
3313           val = node.primary_ip
3314         elif field == "sip":
3315           val = node.secondary_ip
3316         elif field == "tags":
3317           val = list(node.GetTags())
3318         elif field == "master":
3319           val = node.name == master_node
3320         elif self._FIELDS_DYNAMIC.Matches(field):
3321           val = live_data[node.name].get(field, None)
3322         elif field == "role":
3323           if node.name == master_node:
3324             val = "M"
3325           elif node.master_candidate:
3326             val = "C"
3327           elif node.drained:
3328             val = "D"
3329           elif node.offline:
3330             val = "O"
3331           else:
3332             val = "R"
3333         else:
3334           raise errors.ParameterError(field)
3335         node_output.append(val)
3336       output.append(node_output)
3337
3338     return output
3339
3340
3341 class LUQueryNodeVolumes(NoHooksLU):
3342   """Logical unit for getting volumes on node(s).
3343
3344   """
3345   _OP_PARAMS = [
3346     ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3347     ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3348     ]
3349   REQ_BGL = False
3350   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3351   _FIELDS_STATIC = utils.FieldSet("node")
3352
3353   def CheckArguments(self):
3354     _CheckOutputFields(static=self._FIELDS_STATIC,
3355                        dynamic=self._FIELDS_DYNAMIC,
3356                        selected=self.op.output_fields)
3357
3358   def ExpandNames(self):
3359     self.needed_locks = {}
3360     self.share_locks[locking.LEVEL_NODE] = 1
3361     if not self.op.nodes:
3362       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3363     else:
3364       self.needed_locks[locking.LEVEL_NODE] = \
3365         _GetWantedNodes(self, self.op.nodes)
3366
3367   def Exec(self, feedback_fn):
3368     """Computes the list of nodes and their attributes.
3369
3370     """
3371     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3372     volumes = self.rpc.call_node_volumes(nodenames)
3373
3374     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3375              in self.cfg.GetInstanceList()]
3376
3377     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3378
3379     output = []
3380     for node in nodenames:
3381       nresult = volumes[node]
3382       if nresult.offline:
3383         continue
3384       msg = nresult.fail_msg
3385       if msg:
3386         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3387         continue
3388
3389       node_vols = nresult.payload[:]
3390       node_vols.sort(key=lambda vol: vol['dev'])
3391
3392       for vol in node_vols:
3393         node_output = []
3394         for field in self.op.output_fields:
3395           if field == "node":
3396             val = node
3397           elif field == "phys":
3398             val = vol['dev']
3399           elif field == "vg":
3400             val = vol['vg']
3401           elif field == "name":
3402             val = vol['name']
3403           elif field == "size":
3404             val = int(float(vol['size']))
3405           elif field == "instance":
3406             for inst in ilist:
3407               if node not in lv_by_node[inst]:
3408                 continue
3409               if vol['name'] in lv_by_node[inst][node]:
3410                 val = inst.name
3411                 break
3412             else:
3413               val = '-'
3414           else:
3415             raise errors.ParameterError(field)
3416           node_output.append(str(val))
3417
3418         output.append(node_output)
3419
3420     return output
3421
3422
3423 class LUQueryNodeStorage(NoHooksLU):
3424   """Logical unit for getting information on storage units on node(s).
3425
3426   """
3427   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3428   _OP_PARAMS = [
3429     ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
3430     ("storage_type", ht.NoDefault, _CheckStorageType),
3431     ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
3432     ("name", None, ht.TMaybeString),
3433     ]
3434   REQ_BGL = False
3435
3436   def CheckArguments(self):
3437     _CheckOutputFields(static=self._FIELDS_STATIC,
3438                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3439                        selected=self.op.output_fields)
3440
3441   def ExpandNames(self):
3442     self.needed_locks = {}
3443     self.share_locks[locking.LEVEL_NODE] = 1
3444
3445     if self.op.nodes:
3446       self.needed_locks[locking.LEVEL_NODE] = \
3447         _GetWantedNodes(self, self.op.nodes)
3448     else:
3449       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3450
3451   def Exec(self, feedback_fn):
3452     """Computes the list of nodes and their attributes.
3453
3454     """
3455     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3456
3457     # Always get name to sort by
3458     if constants.SF_NAME in self.op.output_fields:
3459       fields = self.op.output_fields[:]
3460     else:
3461       fields = [constants.SF_NAME] + self.op.output_fields
3462
3463     # Never ask for node or type as it's only known to the LU
3464     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3465       while extra in fields:
3466         fields.remove(extra)
3467
3468     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3469     name_idx = field_idx[constants.SF_NAME]
3470
3471     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3472     data = self.rpc.call_storage_list(self.nodes,
3473                                       self.op.storage_type, st_args,
3474                                       self.op.name, fields)
3475
3476     result = []
3477
3478     for node in utils.NiceSort(self.nodes):
3479       nresult = data[node]
3480       if nresult.offline:
3481         continue
3482
3483       msg = nresult.fail_msg
3484       if msg:
3485         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3486         continue
3487
3488       rows = dict([(row[name_idx], row) for row in nresult.payload])
3489
3490       for name in utils.NiceSort(rows.keys()):
3491         row = rows[name]
3492
3493         out = []
3494
3495         for field in self.op.output_fields:
3496           if field == constants.SF_NODE:
3497             val = node
3498           elif field == constants.SF_TYPE:
3499             val = self.op.storage_type
3500           elif field in field_idx:
3501             val = row[field_idx[field]]
3502           else:
3503             raise errors.ParameterError(field)
3504
3505           out.append(val)
3506
3507         result.append(out)
3508
3509     return result
3510
3511
3512 class LUModifyNodeStorage(NoHooksLU):
3513   """Logical unit for modifying a storage volume on a node.
3514
3515   """
3516   _OP_PARAMS = [
3517     _PNodeName,
3518     ("storage_type", ht.NoDefault, _CheckStorageType),
3519     ("name", ht.NoDefault, ht.TNonEmptyString),
3520     ("changes", ht.NoDefault, ht.TDict),
3521     ]
3522   REQ_BGL = False
3523
3524   def CheckArguments(self):
3525     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3526
3527     storage_type = self.op.storage_type
3528
3529     try:
3530       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3531     except KeyError:
3532       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3533                                  " modified" % storage_type,
3534                                  errors.ECODE_INVAL)
3535
3536     diff = set(self.op.changes.keys()) - modifiable
3537     if diff:
3538       raise errors.OpPrereqError("The following fields can not be modified for"
3539                                  " storage units of type '%s': %r" %
3540                                  (storage_type, list(diff)),
3541                                  errors.ECODE_INVAL)
3542
3543   def ExpandNames(self):
3544     self.needed_locks = {
3545       locking.LEVEL_NODE: self.op.node_name,
3546       }
3547
3548   def Exec(self, feedback_fn):
3549     """Computes the list of nodes and their attributes.
3550
3551     """
3552     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3553     result = self.rpc.call_storage_modify(self.op.node_name,
3554                                           self.op.storage_type, st_args,
3555                                           self.op.name, self.op.changes)
3556     result.Raise("Failed to modify storage unit '%s' on %s" %
3557                  (self.op.name, self.op.node_name))
3558
3559
3560 class LUAddNode(LogicalUnit):
3561   """Logical unit for adding node to the cluster.
3562
3563   """
3564   HPATH = "node-add"
3565   HTYPE = constants.HTYPE_NODE
3566   _OP_PARAMS = [
3567     _PNodeName,
3568     ("primary_ip", None, ht.NoType),
3569     ("secondary_ip", None, ht.TMaybeString),
3570     ("readd", False, ht.TBool),
3571     ("nodegroup", None, ht.TMaybeString)
3572     ]
3573
3574   def CheckArguments(self):
3575     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
3576     # validate/normalize the node name
3577     self.hostname = netutils.GetHostname(name=self.op.node_name,
3578                                          family=self.primary_ip_family)
3579     self.op.node_name = self.hostname.name
3580     if self.op.readd and self.op.nodegroup:
3581       raise errors.OpPrereqError("Cannot pass a nodegroup when a node is"
3582                                  " being readded", errors.ECODE_INVAL)
3583
3584   def BuildHooksEnv(self):
3585     """Build hooks env.
3586
3587     This will run on all nodes before, and on all nodes + the new node after.
3588
3589     """
3590     env = {
3591       "OP_TARGET": self.op.node_name,
3592       "NODE_NAME": self.op.node_name,
3593       "NODE_PIP": self.op.primary_ip,
3594       "NODE_SIP": self.op.secondary_ip,
3595       }
3596     nodes_0 = self.cfg.GetNodeList()
3597     nodes_1 = nodes_0 + [self.op.node_name, ]
3598     return env, nodes_0, nodes_1
3599
3600   def CheckPrereq(self):
3601     """Check prerequisites.
3602
3603     This checks:
3604      - the new node is not already in the config
3605      - it is resolvable
3606      - its parameters (single/dual homed) matches the cluster
3607
3608     Any errors are signaled by raising errors.OpPrereqError.
3609
3610     """
3611     cfg = self.cfg
3612     hostname = self.hostname
3613     node = hostname.name
3614     primary_ip = self.op.primary_ip = hostname.ip
3615     if self.op.secondary_ip is None:
3616       if self.primary_ip_family == netutils.IP6Address.family:
3617         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
3618                                    " IPv4 address must be given as secondary",
3619                                    errors.ECODE_INVAL)
3620       self.op.secondary_ip = primary_ip
3621
3622     secondary_ip = self.op.secondary_ip
3623     if not netutils.IP4Address.IsValid(secondary_ip):
3624       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
3625                                  " address" % secondary_ip, errors.ECODE_INVAL)
3626
3627     node_list = cfg.GetNodeList()
3628     if not self.op.readd and node in node_list:
3629       raise errors.OpPrereqError("Node %s is already in the configuration" %
3630                                  node, errors.ECODE_EXISTS)
3631     elif self.op.readd and node not in node_list:
3632       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3633                                  errors.ECODE_NOENT)
3634
3635     self.changed_primary_ip = False
3636
3637     for existing_node_name in node_list:
3638       existing_node = cfg.GetNodeInfo(existing_node_name)
3639
3640       if self.op.readd and node == existing_node_name:
3641         if existing_node.secondary_ip != secondary_ip:
3642           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3643                                      " address configuration as before",
3644                                      errors.ECODE_INVAL)
3645         if existing_node.primary_ip != primary_ip:
3646           self.changed_primary_ip = True
3647
3648         continue
3649
3650       if (existing_node.primary_ip == primary_ip or
3651           existing_node.secondary_ip == primary_ip or
3652           existing_node.primary_ip == secondary_ip or
3653           existing_node.secondary_ip == secondary_ip):
3654         raise errors.OpPrereqError("New node ip address(es) conflict with"
3655                                    " existing node %s" % existing_node.name,
3656                                    errors.ECODE_NOTUNIQUE)
3657
3658     # check that the type of the node (single versus dual homed) is the
3659     # same as for the master
3660     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3661     master_singlehomed = myself.secondary_ip == myself.primary_ip
3662     newbie_singlehomed = secondary_ip == primary_ip
3663     if master_singlehomed != newbie_singlehomed:
3664       if master_singlehomed:
3665         raise errors.OpPrereqError("The master has no private ip but the"
3666                                    " new node has one",
3667                                    errors.ECODE_INVAL)
3668       else:
3669         raise errors.OpPrereqError("The master has a private ip but the"
3670                                    " new node doesn't have one",
3671                                    errors.ECODE_INVAL)
3672
3673     # checks reachability
3674     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3675       raise errors.OpPrereqError("Node not reachable by ping",
3676                                  errors.ECODE_ENVIRON)
3677
3678     if not newbie_singlehomed:
3679       # check reachability from my secondary ip to newbie's secondary ip
3680       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3681                            source=myself.secondary_ip):
3682         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3683                                    " based ping to noded port",
3684                                    errors.ECODE_ENVIRON)
3685
3686     if self.op.readd:
3687       exceptions = [node]
3688     else:
3689       exceptions = []
3690
3691     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3692
3693     if self.op.readd:
3694       self.new_node = self.cfg.GetNodeInfo(node)
3695       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3696     else:
3697       nodegroup = cfg.LookupNodeGroup(self.op.nodegroup)
3698       self.new_node = objects.Node(name=node,
3699                                    primary_ip=primary_ip,
3700                                    secondary_ip=secondary_ip,
3701                                    master_candidate=self.master_candidate,
3702                                    offline=False, drained=False,
3703                                    nodegroup=nodegroup)
3704
3705   def Exec(self, feedback_fn):
3706     """Adds the new node to the cluster.
3707
3708     """
3709     new_node = self.new_node
3710     node = new_node.name
3711
3712     # for re-adds, reset the offline/drained/master-candidate flags;
3713     # we need to reset here, otherwise offline would prevent RPC calls
3714     # later in the procedure; this also means that if the re-add
3715     # fails, we are left with a non-offlined, broken node
3716     if self.op.readd:
3717       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3718       self.LogInfo("Readding a node, the offline/drained flags were reset")
3719       # if we demote the node, we do cleanup later in the procedure
3720       new_node.master_candidate = self.master_candidate
3721       if self.changed_primary_ip:
3722         new_node.primary_ip = self.op.primary_ip
3723
3724     # notify the user about any possible mc promotion
3725     if new_node.master_candidate:
3726       self.LogInfo("Node will be a master candidate")
3727
3728     # check connectivity
3729     result = self.rpc.call_version([node])[node]
3730     result.Raise("Can't get version information from node %s" % node)
3731     if constants.PROTOCOL_VERSION == result.payload:
3732       logging.info("Communication to node %s fine, sw version %s match",
3733                    node, result.payload)
3734     else:
3735       raise errors.OpExecError("Version mismatch master version %s,"
3736                                " node version %s" %
3737                                (constants.PROTOCOL_VERSION, result.payload))
3738
3739     # Add node to our /etc/hosts, and add key to known_hosts
3740     if self.cfg.GetClusterInfo().modify_etc_hosts:
3741       master_node = self.cfg.GetMasterNode()
3742       result = self.rpc.call_etc_hosts_modify(master_node,
3743                                               constants.ETC_HOSTS_ADD,
3744                                               self.hostname.name,
3745                                               self.hostname.ip)
3746       result.Raise("Can't update hosts file with new host data")
3747
3748     if new_node.secondary_ip != new_node.primary_ip:
3749       result = self.rpc.call_node_has_ip_address(new_node.name,
3750                                                  new_node.secondary_ip)
3751       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3752                    prereq=True, ecode=errors.ECODE_ENVIRON)
3753       if not result.payload:
3754         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3755                                  " you gave (%s). Please fix and re-run this"
3756                                  " command." % new_node.secondary_ip)
3757
3758     node_verify_list = [self.cfg.GetMasterNode()]
3759     node_verify_param = {
3760       constants.NV_NODELIST: [node],
3761       # TODO: do a node-net-test as well?
3762     }
3763
3764     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3765                                        self.cfg.GetClusterName())
3766     for verifier in node_verify_list:
3767       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3768       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3769       if nl_payload:
3770         for failed in nl_payload:
3771           feedback_fn("ssh/hostname verification failed"
3772                       " (checking from %s): %s" %
3773                       (verifier, nl_payload[failed]))
3774         raise errors.OpExecError("ssh/hostname verification failed.")
3775
3776     if self.op.readd:
3777       _RedistributeAncillaryFiles(self)
3778       self.context.ReaddNode(new_node)
3779       # make sure we redistribute the config
3780       self.cfg.Update(new_node, feedback_fn)
3781       # and make sure the new node will not have old files around
3782       if not new_node.master_candidate:
3783         result = self.rpc.call_node_demote_from_mc(new_node.name)
3784         msg = result.fail_msg
3785         if msg:
3786           self.LogWarning("Node failed to demote itself from master"
3787                           " candidate status: %s" % msg)
3788     else:
3789       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3790       self.context.AddNode(new_node, self.proc.GetECId())
3791
3792
3793 class LUSetNodeParams(LogicalUnit):
3794   """Modifies the parameters of a node.
3795
3796   """
3797   HPATH = "node-modify"
3798   HTYPE = constants.HTYPE_NODE
3799   _OP_PARAMS = [
3800     _PNodeName,
3801     ("master_candidate", None, ht.TMaybeBool),
3802     ("offline", None, ht.TMaybeBool),
3803     ("drained", None, ht.TMaybeBool),
3804     ("auto_promote", False, ht.TBool),
3805     _PForce,
3806     ]
3807   REQ_BGL = False
3808
3809   def CheckArguments(self):
3810     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3811     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3812     if all_mods.count(None) == 3:
3813       raise errors.OpPrereqError("Please pass at least one modification",
3814                                  errors.ECODE_INVAL)
3815     if all_mods.count(True) > 1:
3816       raise errors.OpPrereqError("Can't set the node into more than one"
3817                                  " state at the same time",
3818                                  errors.ECODE_INVAL)
3819
3820     # Boolean value that tells us whether we're offlining or draining the node
3821     self.offline_or_drain = (self.op.offline == True or
3822                              self.op.drained == True)
3823     self.deoffline_or_drain = (self.op.offline == False or
3824                                self.op.drained == False)
3825     self.might_demote = (self.op.master_candidate == False or
3826                          self.offline_or_drain)
3827
3828     self.lock_all = self.op.auto_promote and self.might_demote
3829
3830
3831   def ExpandNames(self):
3832     if self.lock_all:
3833       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3834     else:
3835       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3836
3837   def BuildHooksEnv(self):
3838     """Build hooks env.
3839
3840     This runs on the master node.
3841
3842     """
3843     env = {
3844       "OP_TARGET": self.op.node_name,
3845       "MASTER_CANDIDATE": str(self.op.master_candidate),
3846       "OFFLINE": str(self.op.offline),
3847       "DRAINED": str(self.op.drained),
3848       }
3849     nl = [self.cfg.GetMasterNode(),
3850           self.op.node_name]
3851     return env, nl, nl
3852
3853   def CheckPrereq(self):
3854     """Check prerequisites.
3855
3856     This only checks the instance list against the existing names.
3857
3858     """
3859     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3860
3861     if (self.op.master_candidate is not None or
3862         self.op.drained is not None or
3863         self.op.offline is not None):
3864       # we can't change the master's node flags
3865       if self.op.node_name == self.cfg.GetMasterNode():
3866         raise errors.OpPrereqError("The master role can be changed"
3867                                    " only via master-failover",
3868                                    errors.ECODE_INVAL)
3869
3870
3871     if node.master_candidate and self.might_demote and not self.lock_all:
3872       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3873       # check if after removing the current node, we're missing master
3874       # candidates
3875       (mc_remaining, mc_should, _) = \
3876           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3877       if mc_remaining < mc_should:
3878         raise errors.OpPrereqError("Not enough master candidates, please"
3879                                    " pass auto_promote to allow promotion",
3880                                    errors.ECODE_INVAL)
3881
3882     if (self.op.master_candidate == True and
3883         ((node.offline and not self.op.offline == False) or
3884          (node.drained and not self.op.drained == False))):
3885       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3886                                  " to master_candidate" % node.name,
3887                                  errors.ECODE_INVAL)
3888
3889     # If we're being deofflined/drained, we'll MC ourself if needed
3890     if (self.deoffline_or_drain and not self.offline_or_drain and not
3891         self.op.master_candidate == True and not node.master_candidate):
3892       self.op.master_candidate = _DecideSelfPromotion(self)
3893       if self.op.master_candidate:
3894         self.LogInfo("Autopromoting node to master candidate")
3895
3896     return
3897
3898   def Exec(self, feedback_fn):
3899     """Modifies a node.
3900
3901     """
3902     node = self.node
3903
3904     result = []
3905     changed_mc = False
3906
3907     if self.op.offline is not None:
3908       node.offline = self.op.offline
3909       result.append(("offline", str(self.op.offline)))
3910       if self.op.offline == True:
3911         if node.master_candidate:
3912           node.master_candidate = False
3913           changed_mc = True
3914           result.append(("master_candidate", "auto-demotion due to offline"))
3915         if node.drained:
3916           node.drained = False
3917           result.append(("drained", "clear drained status due to offline"))
3918
3919     if self.op.master_candidate is not None:
3920       node.master_candidate = self.op.master_candidate
3921       changed_mc = True
3922       result.append(("master_candidate", str(self.op.master_candidate)))
3923       if self.op.master_candidate == False:
3924         rrc = self.rpc.call_node_demote_from_mc(node.name)
3925         msg = rrc.fail_msg
3926         if msg:
3927           self.LogWarning("Node failed to demote itself: %s" % msg)
3928
3929     if self.op.drained is not None:
3930       node.drained = self.op.drained
3931       result.append(("drained", str(self.op.drained)))
3932       if self.op.drained == True:
3933         if node.master_candidate:
3934           node.master_candidate = False
3935           changed_mc = True
3936           result.append(("master_candidate", "auto-demotion due to drain"))
3937           rrc = self.rpc.call_node_demote_from_mc(node.name)
3938           msg = rrc.fail_msg
3939           if msg:
3940             self.LogWarning("Node failed to demote itself: %s" % msg)
3941         if node.offline:
3942           node.offline = False
3943           result.append(("offline", "clear offline status due to drain"))
3944
3945     # we locked all nodes, we adjust the CP before updating this node
3946     if self.lock_all:
3947       _AdjustCandidatePool(self, [node.name])
3948
3949     # this will trigger configuration file update, if needed
3950     self.cfg.Update(node, feedback_fn)
3951
3952     # this will trigger job queue propagation or cleanup
3953     if changed_mc:
3954       self.context.ReaddNode(node)
3955
3956     return result
3957
3958
3959 class LUPowercycleNode(NoHooksLU):
3960   """Powercycles a node.
3961
3962   """
3963   _OP_PARAMS = [
3964     _PNodeName,
3965     _PForce,
3966     ]
3967   REQ_BGL = False
3968
3969   def CheckArguments(self):
3970     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3971     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3972       raise errors.OpPrereqError("The node is the master and the force"
3973                                  " parameter was not set",
3974                                  errors.ECODE_INVAL)
3975
3976   def ExpandNames(self):
3977     """Locking for PowercycleNode.
3978
3979     This is a last-resort option and shouldn't block on other
3980     jobs. Therefore, we grab no locks.
3981
3982     """
3983     self.needed_locks = {}
3984
3985   def Exec(self, feedback_fn):
3986     """Reboots a node.
3987
3988     """
3989     result = self.rpc.call_node_powercycle(self.op.node_name,
3990                                            self.cfg.GetHypervisorType())
3991     result.Raise("Failed to schedule the reboot")
3992     return result.payload
3993
3994
3995 class LUQueryClusterInfo(NoHooksLU):
3996   """Query cluster configuration.
3997
3998   """
3999   REQ_BGL = False
4000
4001   def ExpandNames(self):
4002     self.needed_locks = {}
4003
4004   def Exec(self, feedback_fn):
4005     """Return cluster config.
4006
4007     """
4008     cluster = self.cfg.GetClusterInfo()
4009     os_hvp = {}
4010
4011     # Filter just for enabled hypervisors
4012     for os_name, hv_dict in cluster.os_hvp.items():
4013       os_hvp[os_name] = {}
4014       for hv_name, hv_params in hv_dict.items():
4015         if hv_name in cluster.enabled_hypervisors:
4016           os_hvp[os_name][hv_name] = hv_params
4017
4018     # Convert ip_family to ip_version
4019     primary_ip_version = constants.IP4_VERSION
4020     if cluster.primary_ip_family == netutils.IP6Address.family:
4021       primary_ip_version = constants.IP6_VERSION
4022
4023     result = {
4024       "software_version": constants.RELEASE_VERSION,
4025       "protocol_version": constants.PROTOCOL_VERSION,
4026       "config_version": constants.CONFIG_VERSION,
4027       "os_api_version": max(constants.OS_API_VERSIONS),
4028       "export_version": constants.EXPORT_VERSION,
4029       "architecture": (platform.architecture()[0], platform.machine()),
4030       "name": cluster.cluster_name,
4031       "master": cluster.master_node,
4032       "default_hypervisor": cluster.enabled_hypervisors[0],
4033       "enabled_hypervisors": cluster.enabled_hypervisors,
4034       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4035                         for hypervisor_name in cluster.enabled_hypervisors]),
4036       "os_hvp": os_hvp,
4037       "beparams": cluster.beparams,
4038       "osparams": cluster.osparams,
4039       "nicparams": cluster.nicparams,
4040       "candidate_pool_size": cluster.candidate_pool_size,
4041       "master_netdev": cluster.master_netdev,
4042       "volume_group_name": cluster.volume_group_name,
4043       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4044       "file_storage_dir": cluster.file_storage_dir,
4045       "maintain_node_health": cluster.maintain_node_health,
4046       "ctime": cluster.ctime,
4047       "mtime": cluster.mtime,
4048       "uuid": cluster.uuid,
4049       "tags": list(cluster.GetTags()),
4050       "uid_pool": cluster.uid_pool,
4051       "default_iallocator": cluster.default_iallocator,
4052       "reserved_lvs": cluster.reserved_lvs,
4053       "primary_ip_version": primary_ip_version,
4054       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4055       }
4056
4057     return result
4058
4059
4060 class LUQueryConfigValues(NoHooksLU):
4061   """Return configuration values.
4062
4063   """
4064   _OP_PARAMS = [_POutputFields]
4065   REQ_BGL = False
4066   _FIELDS_DYNAMIC = utils.FieldSet()
4067   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4068                                   "watcher_pause", "volume_group_name")
4069
4070   def CheckArguments(self):
4071     _CheckOutputFields(static=self._FIELDS_STATIC,
4072                        dynamic=self._FIELDS_DYNAMIC,
4073                        selected=self.op.output_fields)
4074
4075   def ExpandNames(self):
4076     self.needed_locks = {}
4077
4078   def Exec(self, feedback_fn):
4079     """Dump a representation of the cluster config to the standard output.
4080
4081     """
4082     values = []
4083     for field in self.op.output_fields:
4084       if field == "cluster_name":
4085         entry = self.cfg.GetClusterName()
4086       elif field == "master_node":
4087         entry = self.cfg.GetMasterNode()
4088       elif field == "drain_flag":
4089         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4090       elif field == "watcher_pause":
4091         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4092       elif field == "volume_group_name":
4093         entry = self.cfg.GetVGName()
4094       else:
4095         raise errors.ParameterError(field)
4096       values.append(entry)
4097     return values
4098
4099
4100 class LUActivateInstanceDisks(NoHooksLU):
4101   """Bring up an instance's disks.
4102
4103   """
4104   _OP_PARAMS = [
4105     _PInstanceName,
4106     ("ignore_size", False, ht.TBool),
4107     ]
4108   REQ_BGL = False
4109
4110   def ExpandNames(self):
4111     self._ExpandAndLockInstance()
4112     self.needed_locks[locking.LEVEL_NODE] = []
4113     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4114
4115   def DeclareLocks(self, level):
4116     if level == locking.LEVEL_NODE:
4117       self._LockInstancesNodes()
4118
4119   def CheckPrereq(self):
4120     """Check prerequisites.
4121
4122     This checks that the instance is in the cluster.
4123
4124     """
4125     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4126     assert self.instance is not None, \
4127       "Cannot retrieve locked instance %s" % self.op.instance_name
4128     _CheckNodeOnline(self, self.instance.primary_node)
4129
4130   def Exec(self, feedback_fn):
4131     """Activate the disks.
4132
4133     """
4134     disks_ok, disks_info = \
4135               _AssembleInstanceDisks(self, self.instance,
4136                                      ignore_size=self.op.ignore_size)
4137     if not disks_ok:
4138       raise errors.OpExecError("Cannot activate block devices")
4139
4140     return disks_info
4141
4142
4143 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4144                            ignore_size=False):
4145   """Prepare the block devices for an instance.
4146
4147   This sets up the block devices on all nodes.
4148
4149   @type lu: L{LogicalUnit}
4150   @param lu: the logical unit on whose behalf we execute
4151   @type instance: L{objects.Instance}
4152   @param instance: the instance for whose disks we assemble
4153   @type disks: list of L{objects.Disk} or None
4154   @param disks: which disks to assemble (or all, if None)
4155   @type ignore_secondaries: boolean
4156   @param ignore_secondaries: if true, errors on secondary nodes
4157       won't result in an error return from the function
4158   @type ignore_size: boolean
4159   @param ignore_size: if true, the current known size of the disk
4160       will not be used during the disk activation, useful for cases
4161       when the size is wrong
4162   @return: False if the operation failed, otherwise a list of
4163       (host, instance_visible_name, node_visible_name)
4164       with the mapping from node devices to instance devices
4165
4166   """
4167   device_info = []
4168   disks_ok = True
4169   iname = instance.name
4170   disks = _ExpandCheckDisks(instance, disks)
4171
4172   # With the two passes mechanism we try to reduce the window of
4173   # opportunity for the race condition of switching DRBD to primary
4174   # before handshaking occured, but we do not eliminate it
4175
4176   # The proper fix would be to wait (with some limits) until the
4177   # connection has been made and drbd transitions from WFConnection
4178   # into any other network-connected state (Connected, SyncTarget,
4179   # SyncSource, etc.)
4180
4181   # 1st pass, assemble on all nodes in secondary mode
4182   for inst_disk in disks:
4183     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4184       if ignore_size:
4185         node_disk = node_disk.Copy()
4186         node_disk.UnsetSize()
4187       lu.cfg.SetDiskID(node_disk, node)
4188       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4189       msg = result.fail_msg
4190       if msg:
4191         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4192                            " (is_primary=False, pass=1): %s",
4193                            inst_disk.iv_name, node, msg)
4194         if not ignore_secondaries:
4195           disks_ok = False
4196
4197   # FIXME: race condition on drbd migration to primary
4198
4199   # 2nd pass, do only the primary node
4200   for inst_disk in disks:
4201     dev_path = None
4202
4203     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4204       if node != instance.primary_node:
4205         continue
4206       if ignore_size:
4207         node_disk = node_disk.Copy()
4208         node_disk.UnsetSize()
4209       lu.cfg.SetDiskID(node_disk, node)
4210       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4211       msg = result.fail_msg
4212       if msg:
4213         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4214                            " (is_primary=True, pass=2): %s",
4215                            inst_disk.iv_name, node, msg)
4216         disks_ok = False
4217       else:
4218         dev_path = result.payload
4219
4220     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4221
4222   # leave the disks configured for the primary node
4223   # this is a workaround that would be fixed better by
4224   # improving the logical/physical id handling
4225   for disk in disks:
4226     lu.cfg.SetDiskID(disk, instance.primary_node)
4227
4228   return disks_ok, device_info
4229
4230
4231 def _StartInstanceDisks(lu, instance, force):
4232   """Start the disks of an instance.
4233
4234   """
4235   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4236                                            ignore_secondaries=force)
4237   if not disks_ok:
4238     _ShutdownInstanceDisks(lu, instance)
4239     if force is not None and not force:
4240       lu.proc.LogWarning("", hint="If the message above refers to a"
4241                          " secondary node,"
4242                          " you can retry the operation using '--force'.")
4243     raise errors.OpExecError("Disk consistency error")
4244
4245
4246 class LUDeactivateInstanceDisks(NoHooksLU):
4247   """Shutdown an instance's disks.
4248
4249   """
4250   _OP_PARAMS = [
4251     _PInstanceName,
4252     ]
4253   REQ_BGL = False
4254
4255   def ExpandNames(self):
4256     self._ExpandAndLockInstance()
4257     self.needed_locks[locking.LEVEL_NODE] = []
4258     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4259
4260   def DeclareLocks(self, level):
4261     if level == locking.LEVEL_NODE:
4262       self._LockInstancesNodes()
4263
4264   def CheckPrereq(self):
4265     """Check prerequisites.
4266
4267     This checks that the instance is in the cluster.
4268
4269     """
4270     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4271     assert self.instance is not None, \
4272       "Cannot retrieve locked instance %s" % self.op.instance_name
4273
4274   def Exec(self, feedback_fn):
4275     """Deactivate the disks
4276
4277     """
4278     instance = self.instance
4279     _SafeShutdownInstanceDisks(self, instance)
4280
4281
4282 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4283   """Shutdown block devices of an instance.
4284
4285   This function checks if an instance is running, before calling
4286   _ShutdownInstanceDisks.
4287
4288   """
4289   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4290   _ShutdownInstanceDisks(lu, instance, disks=disks)
4291
4292
4293 def _ExpandCheckDisks(instance, disks):
4294   """Return the instance disks selected by the disks list
4295
4296   @type disks: list of L{objects.Disk} or None
4297   @param disks: selected disks
4298   @rtype: list of L{objects.Disk}
4299   @return: selected instance disks to act on
4300
4301   """
4302   if disks is None:
4303     return instance.disks
4304   else:
4305     if not set(disks).issubset(instance.disks):
4306       raise errors.ProgrammerError("Can only act on disks belonging to the"
4307                                    " target instance")
4308     return disks
4309
4310
4311 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4312   """Shutdown block devices of an instance.
4313
4314   This does the shutdown on all nodes of the instance.
4315
4316   If the ignore_primary is false, errors on the primary node are
4317   ignored.
4318
4319   """
4320   all_result = True
4321   disks = _ExpandCheckDisks(instance, disks)
4322
4323   for disk in disks:
4324     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4325       lu.cfg.SetDiskID(top_disk, node)
4326       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4327       msg = result.fail_msg
4328       if msg:
4329         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4330                       disk.iv_name, node, msg)
4331         if not ignore_primary or node != instance.primary_node:
4332           all_result = False
4333   return all_result
4334
4335
4336 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4337   """Checks if a node has enough free memory.
4338
4339   This function check if a given node has the needed amount of free
4340   memory. In case the node has less memory or we cannot get the
4341   information from the node, this function raise an OpPrereqError
4342   exception.
4343
4344   @type lu: C{LogicalUnit}
4345   @param lu: a logical unit from which we get configuration data
4346   @type node: C{str}
4347   @param node: the node to check
4348   @type reason: C{str}
4349   @param reason: string to use in the error message
4350   @type requested: C{int}
4351   @param requested: the amount of memory in MiB to check for
4352   @type hypervisor_name: C{str}
4353   @param hypervisor_name: the hypervisor to ask for memory stats
4354   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4355       we cannot check the node
4356
4357   """
4358   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4359   nodeinfo[node].Raise("Can't get data from node %s" % node,
4360                        prereq=True, ecode=errors.ECODE_ENVIRON)
4361   free_mem = nodeinfo[node].payload.get('memory_free', None)
4362   if not isinstance(free_mem, int):
4363     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4364                                " was '%s'" % (node, free_mem),
4365                                errors.ECODE_ENVIRON)
4366   if requested > free_mem:
4367     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4368                                " needed %s MiB, available %s MiB" %
4369                                (node, reason, requested, free_mem),
4370                                errors.ECODE_NORES)
4371
4372
4373 def _CheckNodesFreeDisk(lu, nodenames, requested):
4374   """Checks if nodes have enough free disk space in the default VG.
4375
4376   This function check if all given nodes have the needed amount of
4377   free disk. In case any node has less disk or we cannot get the
4378   information from the node, this function raise an OpPrereqError
4379   exception.
4380
4381   @type lu: C{LogicalUnit}
4382   @param lu: a logical unit from which we get configuration data
4383   @type nodenames: C{list}
4384   @param nodenames: the list of node names to check
4385   @type requested: C{int}
4386   @param requested: the amount of disk in MiB to check for
4387   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4388       we cannot check the node
4389
4390   """
4391   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4392                                    lu.cfg.GetHypervisorType())
4393   for node in nodenames:
4394     info = nodeinfo[node]
4395     info.Raise("Cannot get current information from node %s" % node,
4396                prereq=True, ecode=errors.ECODE_ENVIRON)
4397     vg_free = info.payload.get("vg_free", None)
4398     if not isinstance(vg_free, int):
4399       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4400                                  " result was '%s'" % (node, vg_free),
4401                                  errors.ECODE_ENVIRON)
4402     if requested > vg_free:
4403       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4404                                  " required %d MiB, available %d MiB" %
4405                                  (node, requested, vg_free),
4406                                  errors.ECODE_NORES)
4407
4408
4409 class LUStartupInstance(LogicalUnit):
4410   """Starts an instance.
4411
4412   """
4413   HPATH = "instance-start"
4414   HTYPE = constants.HTYPE_INSTANCE
4415   _OP_PARAMS = [
4416     _PInstanceName,
4417     _PForce,
4418     _PIgnoreOfflineNodes,
4419     ("hvparams", ht.EmptyDict, ht.TDict),
4420     ("beparams", ht.EmptyDict, ht.TDict),
4421     ]
4422   REQ_BGL = False
4423
4424   def CheckArguments(self):
4425     # extra beparams
4426     if self.op.beparams:
4427       # fill the beparams dict
4428       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4429
4430   def ExpandNames(self):
4431     self._ExpandAndLockInstance()
4432
4433   def BuildHooksEnv(self):
4434     """Build hooks env.
4435
4436     This runs on master, primary and secondary nodes of the instance.
4437
4438     """
4439     env = {
4440       "FORCE": self.op.force,
4441       }
4442     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4443     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4444     return env, nl, nl
4445
4446   def CheckPrereq(self):
4447     """Check prerequisites.
4448
4449     This checks that the instance is in the cluster.
4450
4451     """
4452     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4453     assert self.instance is not None, \
4454       "Cannot retrieve locked instance %s" % self.op.instance_name
4455
4456     # extra hvparams
4457     if self.op.hvparams:
4458       # check hypervisor parameter syntax (locally)
4459       cluster = self.cfg.GetClusterInfo()
4460       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4461       filled_hvp = cluster.FillHV(instance)
4462       filled_hvp.update(self.op.hvparams)
4463       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4464       hv_type.CheckParameterSyntax(filled_hvp)
4465       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4466
4467     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
4468
4469     if self.primary_offline and self.op.ignore_offline_nodes:
4470       self.proc.LogWarning("Ignoring offline primary node")
4471
4472       if self.op.hvparams or self.op.beparams:
4473         self.proc.LogWarning("Overridden parameters are ignored")
4474     else:
4475       _CheckNodeOnline(self, instance.primary_node)
4476
4477       bep = self.cfg.GetClusterInfo().FillBE(instance)
4478
4479       # check bridges existence
4480       _CheckInstanceBridgesExist(self, instance)
4481
4482       remote_info = self.rpc.call_instance_info(instance.primary_node,
4483                                                 instance.name,
4484                                                 instance.hypervisor)
4485       remote_info.Raise("Error checking node %s" % instance.primary_node,
4486                         prereq=True, ecode=errors.ECODE_ENVIRON)
4487       if not remote_info.payload: # not running already
4488         _CheckNodeFreeMemory(self, instance.primary_node,
4489                              "starting instance %s" % instance.name,
4490                              bep[constants.BE_MEMORY], instance.hypervisor)
4491
4492   def Exec(self, feedback_fn):
4493     """Start the instance.
4494
4495     """
4496     instance = self.instance
4497     force = self.op.force
4498
4499     self.cfg.MarkInstanceUp(instance.name)
4500
4501     if self.primary_offline:
4502       assert self.op.ignore_offline_nodes
4503       self.proc.LogInfo("Primary node offline, marked instance as started")
4504     else:
4505       node_current = instance.primary_node
4506
4507       _StartInstanceDisks(self, instance, force)
4508
4509       result = self.rpc.call_instance_start(node_current, instance,
4510                                             self.op.hvparams, self.op.beparams)
4511       msg = result.fail_msg
4512       if msg:
4513         _ShutdownInstanceDisks(self, instance)
4514         raise errors.OpExecError("Could not start instance: %s" % msg)
4515
4516
4517 class LURebootInstance(LogicalUnit):
4518   """Reboot an instance.
4519
4520   """
4521   HPATH = "instance-reboot"
4522   HTYPE = constants.HTYPE_INSTANCE
4523   _OP_PARAMS = [
4524     _PInstanceName,
4525     ("ignore_secondaries", False, ht.TBool),
4526     ("reboot_type", ht.NoDefault, ht.TElemOf(constants.REBOOT_TYPES)),
4527     _PShutdownTimeout,
4528     ]
4529   REQ_BGL = False
4530
4531   def ExpandNames(self):
4532     self._ExpandAndLockInstance()
4533
4534   def BuildHooksEnv(self):
4535     """Build hooks env.
4536
4537     This runs on master, primary and secondary nodes of the instance.
4538
4539     """
4540     env = {
4541       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4542       "REBOOT_TYPE": self.op.reboot_type,
4543       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4544       }
4545     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4546     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4547     return env, nl, nl
4548
4549   def CheckPrereq(self):
4550     """Check prerequisites.
4551
4552     This checks that the instance is in the cluster.
4553
4554     """
4555     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4556     assert self.instance is not None, \
4557       "Cannot retrieve locked instance %s" % self.op.instance_name
4558
4559     _CheckNodeOnline(self, instance.primary_node)
4560
4561     # check bridges existence
4562     _CheckInstanceBridgesExist(self, instance)
4563
4564   def Exec(self, feedback_fn):
4565     """Reboot the instance.
4566
4567     """
4568     instance = self.instance
4569     ignore_secondaries = self.op.ignore_secondaries
4570     reboot_type = self.op.reboot_type
4571
4572     node_current = instance.primary_node
4573
4574     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4575                        constants.INSTANCE_REBOOT_HARD]:
4576       for disk in instance.disks:
4577         self.cfg.SetDiskID(disk, node_current)
4578       result = self.rpc.call_instance_reboot(node_current, instance,
4579                                              reboot_type,
4580                                              self.op.shutdown_timeout)
4581       result.Raise("Could not reboot instance")
4582     else:
4583       result = self.rpc.call_instance_shutdown(node_current, instance,
4584                                                self.op.shutdown_timeout)
4585       result.Raise("Could not shutdown instance for full reboot")
4586       _ShutdownInstanceDisks(self, instance)
4587       _StartInstanceDisks(self, instance, ignore_secondaries)
4588       result = self.rpc.call_instance_start(node_current, instance, None, None)
4589       msg = result.fail_msg
4590       if msg:
4591         _ShutdownInstanceDisks(self, instance)
4592         raise errors.OpExecError("Could not start instance for"
4593                                  " full reboot: %s" % msg)
4594
4595     self.cfg.MarkInstanceUp(instance.name)
4596
4597
4598 class LUShutdownInstance(LogicalUnit):
4599   """Shutdown an instance.
4600
4601   """
4602   HPATH = "instance-stop"
4603   HTYPE = constants.HTYPE_INSTANCE
4604   _OP_PARAMS = [
4605     _PInstanceName,
4606     _PIgnoreOfflineNodes,
4607     ("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT, ht.TPositiveInt),
4608     ]
4609   REQ_BGL = False
4610
4611   def ExpandNames(self):
4612     self._ExpandAndLockInstance()
4613
4614   def BuildHooksEnv(self):
4615     """Build hooks env.
4616
4617     This runs on master, primary and secondary nodes of the instance.
4618
4619     """
4620     env = _BuildInstanceHookEnvByObject(self, self.instance)
4621     env["TIMEOUT"] = self.op.timeout
4622     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4623     return env, nl, nl
4624
4625   def CheckPrereq(self):
4626     """Check prerequisites.
4627
4628     This checks that the instance is in the cluster.
4629
4630     """
4631     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4632     assert self.instance is not None, \
4633       "Cannot retrieve locked instance %s" % self.op.instance_name
4634
4635     self.primary_offline = \
4636       self.cfg.GetNodeInfo(self.instance.primary_node).offline
4637
4638     if self.primary_offline and self.op.ignore_offline_nodes:
4639       self.proc.LogWarning("Ignoring offline primary node")
4640     else:
4641       _CheckNodeOnline(self, self.instance.primary_node)
4642
4643   def Exec(self, feedback_fn):
4644     """Shutdown the instance.
4645
4646     """
4647     instance = self.instance
4648     node_current = instance.primary_node
4649     timeout = self.op.timeout
4650
4651     self.cfg.MarkInstanceDown(instance.name)
4652
4653     if self.primary_offline:
4654       assert self.op.ignore_offline_nodes
4655       self.proc.LogInfo("Primary node offline, marked instance as stopped")
4656     else:
4657       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4658       msg = result.fail_msg
4659       if msg:
4660         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4661
4662       _ShutdownInstanceDisks(self, instance)
4663
4664
4665 class LUReinstallInstance(LogicalUnit):
4666   """Reinstall an instance.
4667
4668   """
4669   HPATH = "instance-reinstall"
4670   HTYPE = constants.HTYPE_INSTANCE
4671   _OP_PARAMS = [
4672     _PInstanceName,
4673     ("os_type", None, ht.TMaybeString),
4674     ("force_variant", False, ht.TBool),
4675     ]
4676   REQ_BGL = False
4677
4678   def ExpandNames(self):
4679     self._ExpandAndLockInstance()
4680
4681   def BuildHooksEnv(self):
4682     """Build hooks env.
4683
4684     This runs on master, primary and secondary nodes of the instance.
4685
4686     """
4687     env = _BuildInstanceHookEnvByObject(self, self.instance)
4688     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4689     return env, nl, nl
4690
4691   def CheckPrereq(self):
4692     """Check prerequisites.
4693
4694     This checks that the instance is in the cluster and is not running.
4695
4696     """
4697     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4698     assert instance is not None, \
4699       "Cannot retrieve locked instance %s" % self.op.instance_name
4700     _CheckNodeOnline(self, instance.primary_node)
4701
4702     if instance.disk_template == constants.DT_DISKLESS:
4703       raise errors.OpPrereqError("Instance '%s' has no disks" %
4704                                  self.op.instance_name,
4705                                  errors.ECODE_INVAL)
4706     _CheckInstanceDown(self, instance, "cannot reinstall")
4707
4708     if self.op.os_type is not None:
4709       # OS verification
4710       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4711       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4712
4713     self.instance = instance
4714
4715   def Exec(self, feedback_fn):
4716     """Reinstall the instance.
4717
4718     """
4719     inst = self.instance
4720
4721     if self.op.os_type is not None:
4722       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4723       inst.os = self.op.os_type
4724       self.cfg.Update(inst, feedback_fn)
4725
4726     _StartInstanceDisks(self, inst, None)
4727     try:
4728       feedback_fn("Running the instance OS create scripts...")
4729       # FIXME: pass debug option from opcode to backend
4730       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4731                                              self.op.debug_level)
4732       result.Raise("Could not install OS for instance %s on node %s" %
4733                    (inst.name, inst.primary_node))
4734     finally:
4735       _ShutdownInstanceDisks(self, inst)
4736
4737
4738 class LURecreateInstanceDisks(LogicalUnit):
4739   """Recreate an instance's missing disks.
4740
4741   """
4742   HPATH = "instance-recreate-disks"
4743   HTYPE = constants.HTYPE_INSTANCE
4744   _OP_PARAMS = [
4745     _PInstanceName,
4746     ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
4747     ]
4748   REQ_BGL = False
4749
4750   def ExpandNames(self):
4751     self._ExpandAndLockInstance()
4752
4753   def BuildHooksEnv(self):
4754     """Build hooks env.
4755
4756     This runs on master, primary and secondary nodes of the instance.
4757
4758     """
4759     env = _BuildInstanceHookEnvByObject(self, self.instance)
4760     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4761     return env, nl, nl
4762
4763   def CheckPrereq(self):
4764     """Check prerequisites.
4765
4766     This checks that the instance is in the cluster and is not running.
4767
4768     """
4769     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4770     assert instance is not None, \
4771       "Cannot retrieve locked instance %s" % self.op.instance_name
4772     _CheckNodeOnline(self, instance.primary_node)
4773
4774     if instance.disk_template == constants.DT_DISKLESS:
4775       raise errors.OpPrereqError("Instance '%s' has no disks" %
4776                                  self.op.instance_name, errors.ECODE_INVAL)
4777     _CheckInstanceDown(self, instance, "cannot recreate disks")
4778
4779     if not self.op.disks:
4780       self.op.disks = range(len(instance.disks))
4781     else:
4782       for idx in self.op.disks:
4783         if idx >= len(instance.disks):
4784           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4785                                      errors.ECODE_INVAL)
4786
4787     self.instance = instance
4788
4789   def Exec(self, feedback_fn):
4790     """Recreate the disks.
4791
4792     """
4793     to_skip = []
4794     for idx, _ in enumerate(self.instance.disks):
4795       if idx not in self.op.disks: # disk idx has not been passed in
4796         to_skip.append(idx)
4797         continue
4798
4799     _CreateDisks(self, self.instance, to_skip=to_skip)
4800
4801
4802 class LURenameInstance(LogicalUnit):
4803   """Rename an instance.
4804
4805   """
4806   HPATH = "instance-rename"
4807   HTYPE = constants.HTYPE_INSTANCE
4808   _OP_PARAMS = [
4809     _PInstanceName,
4810     ("new_name", ht.NoDefault, ht.TNonEmptyString),
4811     ("ip_check", False, ht.TBool),
4812     ("name_check", True, ht.TBool),
4813     ]
4814
4815   def CheckArguments(self):
4816     """Check arguments.
4817
4818     """
4819     if self.op.ip_check and not self.op.name_check:
4820       # TODO: make the ip check more flexible and not depend on the name check
4821       raise errors.OpPrereqError("Cannot do ip check without a name check",
4822                                  errors.ECODE_INVAL)
4823
4824   def BuildHooksEnv(self):
4825     """Build hooks env.
4826
4827     This runs on master, primary and secondary nodes of the instance.
4828
4829     """
4830     env = _BuildInstanceHookEnvByObject(self, self.instance)
4831     env["INSTANCE_NEW_NAME"] = self.op.new_name
4832     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4833     return env, nl, nl
4834
4835   def CheckPrereq(self):
4836     """Check prerequisites.
4837
4838     This checks that the instance is in the cluster and is not running.
4839
4840     """
4841     self.op.instance_name = _ExpandInstanceName(self.cfg,
4842                                                 self.op.instance_name)
4843     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4844     assert instance is not None
4845     _CheckNodeOnline(self, instance.primary_node)
4846     _CheckInstanceDown(self, instance, "cannot rename")
4847     self.instance = instance
4848
4849     new_name = self.op.new_name
4850     if self.op.name_check:
4851       hostname = netutils.GetHostname(name=new_name)
4852       new_name = self.op.new_name = hostname.name
4853       if (self.op.ip_check and
4854           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
4855         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4856                                    (hostname.ip, new_name),
4857                                    errors.ECODE_NOTUNIQUE)
4858
4859     instance_list = self.cfg.GetInstanceList()
4860     if new_name in instance_list:
4861       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4862                                  new_name, errors.ECODE_EXISTS)
4863
4864   def Exec(self, feedback_fn):
4865     """Reinstall the instance.
4866
4867     """
4868     inst = self.instance
4869     old_name = inst.name
4870
4871     if inst.disk_template == constants.DT_FILE:
4872       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4873
4874     self.cfg.RenameInstance(inst.name, self.op.new_name)
4875     # Change the instance lock. This is definitely safe while we hold the BGL
4876     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4877     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4878
4879     # re-read the instance from the configuration after rename
4880     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4881
4882     if inst.disk_template == constants.DT_FILE:
4883       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4884       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4885                                                      old_file_storage_dir,
4886                                                      new_file_storage_dir)
4887       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4888                    " (but the instance has been renamed in Ganeti)" %
4889                    (inst.primary_node, old_file_storage_dir,
4890                     new_file_storage_dir))
4891
4892     _StartInstanceDisks(self, inst, None)
4893     try:
4894       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4895                                                  old_name, self.op.debug_level)
4896       msg = result.fail_msg
4897       if msg:
4898         msg = ("Could not run OS rename script for instance %s on node %s"
4899                " (but the instance has been renamed in Ganeti): %s" %
4900                (inst.name, inst.primary_node, msg))
4901         self.proc.LogWarning(msg)
4902     finally:
4903       _ShutdownInstanceDisks(self, inst)
4904
4905     return inst.name
4906
4907
4908 class LURemoveInstance(LogicalUnit):
4909   """Remove an instance.
4910
4911   """
4912   HPATH = "instance-remove"
4913   HTYPE = constants.HTYPE_INSTANCE
4914   _OP_PARAMS = [
4915     _PInstanceName,
4916     ("ignore_failures", False, ht.TBool),
4917     _PShutdownTimeout,
4918     ]
4919   REQ_BGL = False
4920
4921   def ExpandNames(self):
4922     self._ExpandAndLockInstance()
4923     self.needed_locks[locking.LEVEL_NODE] = []
4924     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4925
4926   def DeclareLocks(self, level):
4927     if level == locking.LEVEL_NODE:
4928       self._LockInstancesNodes()
4929
4930   def BuildHooksEnv(self):
4931     """Build hooks env.
4932
4933     This runs on master, primary and secondary nodes of the instance.
4934
4935     """
4936     env = _BuildInstanceHookEnvByObject(self, self.instance)
4937     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4938     nl = [self.cfg.GetMasterNode()]
4939     nl_post = list(self.instance.all_nodes) + nl
4940     return env, nl, nl_post
4941
4942   def CheckPrereq(self):
4943     """Check prerequisites.
4944
4945     This checks that the instance is in the cluster.
4946
4947     """
4948     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4949     assert self.instance is not None, \
4950       "Cannot retrieve locked instance %s" % self.op.instance_name
4951
4952   def Exec(self, feedback_fn):
4953     """Remove the instance.
4954
4955     """
4956     instance = self.instance
4957     logging.info("Shutting down instance %s on node %s",
4958                  instance.name, instance.primary_node)
4959
4960     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4961                                              self.op.shutdown_timeout)
4962     msg = result.fail_msg
4963     if msg:
4964       if self.op.ignore_failures:
4965         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4966       else:
4967         raise errors.OpExecError("Could not shutdown instance %s on"
4968                                  " node %s: %s" %
4969                                  (instance.name, instance.primary_node, msg))
4970
4971     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4972
4973
4974 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4975   """Utility function to remove an instance.
4976
4977   """
4978   logging.info("Removing block devices for instance %s", instance.name)
4979
4980   if not _RemoveDisks(lu, instance):
4981     if not ignore_failures:
4982       raise errors.OpExecError("Can't remove instance's disks")
4983     feedback_fn("Warning: can't remove instance's disks")
4984
4985   logging.info("Removing instance %s out of cluster config", instance.name)
4986
4987   lu.cfg.RemoveInstance(instance.name)
4988
4989   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4990     "Instance lock removal conflict"
4991
4992   # Remove lock for the instance
4993   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4994
4995
4996 class LUQueryInstances(NoHooksLU):
4997   """Logical unit for querying instances.
4998
4999   """
5000   # pylint: disable-msg=W0142
5001   _OP_PARAMS = [
5002     ("output_fields", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
5003     ("names", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
5004     ("use_locking", False, ht.TBool),
5005     ]
5006   REQ_BGL = False
5007   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
5008                     "serial_no", "ctime", "mtime", "uuid"]
5009   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
5010                                     "admin_state",
5011                                     "disk_template", "ip", "mac", "bridge",
5012                                     "nic_mode", "nic_link",
5013                                     "sda_size", "sdb_size", "vcpus", "tags",
5014                                     "network_port", "beparams",
5015                                     r"(disk)\.(size)/([0-9]+)",
5016                                     r"(disk)\.(sizes)", "disk_usage",
5017                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
5018                                     r"(nic)\.(bridge)/([0-9]+)",
5019                                     r"(nic)\.(macs|ips|modes|links|bridges)",
5020                                     r"(disk|nic)\.(count)",
5021                                     "hvparams", "custom_hvparams",
5022                                     "custom_beparams", "custom_nicparams",
5023                                     ] + _SIMPLE_FIELDS +
5024                                   ["hv/%s" % name
5025                                    for name in constants.HVS_PARAMETERS
5026                                    if name not in constants.HVC_GLOBALS] +
5027                                   ["be/%s" % name
5028                                    for name in constants.BES_PARAMETERS])
5029   _FIELDS_DYNAMIC = utils.FieldSet("oper_state",
5030                                    "oper_ram",
5031                                    "oper_vcpus",
5032                                    "status")
5033
5034
5035   def CheckArguments(self):
5036     _CheckOutputFields(static=self._FIELDS_STATIC,
5037                        dynamic=self._FIELDS_DYNAMIC,
5038                        selected=self.op.output_fields)
5039
5040   def ExpandNames(self):
5041     self.needed_locks = {}
5042     self.share_locks[locking.LEVEL_INSTANCE] = 1
5043     self.share_locks[locking.LEVEL_NODE] = 1
5044
5045     if self.op.names:
5046       self.wanted = _GetWantedInstances(self, self.op.names)
5047     else:
5048       self.wanted = locking.ALL_SET
5049
5050     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
5051     self.do_locking = self.do_node_query and self.op.use_locking
5052     if self.do_locking:
5053       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
5054       self.needed_locks[locking.LEVEL_NODE] = []
5055       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5056
5057   def DeclareLocks(self, level):
5058     if level == locking.LEVEL_NODE and self.do_locking:
5059       self._LockInstancesNodes()
5060
5061   def Exec(self, feedback_fn):
5062     """Computes the list of nodes and their attributes.
5063
5064     """
5065     # pylint: disable-msg=R0912
5066     # way too many branches here
5067     all_info = self.cfg.GetAllInstancesInfo()
5068     if self.wanted == locking.ALL_SET:
5069       # caller didn't specify instance names, so ordering is not important
5070       if self.do_locking:
5071         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
5072       else:
5073         instance_names = all_info.keys()
5074       instance_names = utils.NiceSort(instance_names)
5075     else:
5076       # caller did specify names, so we must keep the ordering
5077       if self.do_locking:
5078         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
5079       else:
5080         tgt_set = all_info.keys()
5081       missing = set(self.wanted).difference(tgt_set)
5082       if missing:
5083         raise errors.OpExecError("Some instances were removed before"
5084                                  " retrieving their data: %s" % missing)
5085       instance_names = self.wanted
5086
5087     instance_list = [all_info[iname] for iname in instance_names]
5088
5089     # begin data gathering
5090
5091     nodes = frozenset([inst.primary_node for inst in instance_list])
5092     hv_list = list(set([inst.hypervisor for inst in instance_list]))
5093
5094     bad_nodes = []
5095     off_nodes = []
5096     if self.do_node_query:
5097       live_data = {}
5098       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
5099       for name in nodes:
5100         result = node_data[name]
5101         if result.offline:
5102           # offline nodes will be in both lists
5103           off_nodes.append(name)
5104         if result.fail_msg:
5105           bad_nodes.append(name)
5106         else:
5107           if result.payload:
5108             live_data.update(result.payload)
5109           # else no instance is alive
5110     else:
5111       live_data = dict([(name, {}) for name in instance_names])
5112
5113     # end data gathering
5114
5115     HVPREFIX = "hv/"
5116     BEPREFIX = "be/"
5117     output = []
5118     cluster = self.cfg.GetClusterInfo()
5119     for instance in instance_list:
5120       iout = []
5121       i_hv = cluster.FillHV(instance, skip_globals=True)
5122       i_be = cluster.FillBE(instance)
5123       i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5124       for field in self.op.output_fields:
5125         st_match = self._FIELDS_STATIC.Matches(field)
5126         if field in self._SIMPLE_FIELDS:
5127           val = getattr(instance, field)
5128         elif field == "pnode":
5129           val = instance.primary_node
5130         elif field == "snodes":
5131           val = list(instance.secondary_nodes)
5132         elif field == "admin_state":
5133           val = instance.admin_up
5134         elif field == "oper_state":
5135           if instance.primary_node in bad_nodes:
5136             val = None
5137           else:
5138             val = bool(live_data.get(instance.name))
5139         elif field == "status":
5140           if instance.primary_node in off_nodes:
5141             val = "ERROR_nodeoffline"
5142           elif instance.primary_node in bad_nodes:
5143             val = "ERROR_nodedown"
5144           else:
5145             running = bool(live_data.get(instance.name))
5146             if running:
5147               if instance.admin_up:
5148                 val = "running"
5149               else:
5150                 val = "ERROR_up"
5151             else:
5152               if instance.admin_up:
5153                 val = "ERROR_down"
5154               else:
5155                 val = "ADMIN_down"
5156         elif field == "oper_ram":
5157           if instance.primary_node in bad_nodes:
5158             val = None
5159           elif instance.name in live_data:
5160             val = live_data[instance.name].get("memory", "?")
5161           else:
5162             val = "-"
5163         elif field == "oper_vcpus":
5164           if instance.primary_node in bad_nodes:
5165             val = None
5166           elif instance.name in live_data:
5167             val = live_data[instance.name].get("vcpus", "?")
5168           else:
5169             val = "-"
5170         elif field == "vcpus":
5171           val = i_be[constants.BE_VCPUS]
5172         elif field == "disk_template":
5173           val = instance.disk_template
5174         elif field == "ip":
5175           if instance.nics:
5176             val = instance.nics[0].ip
5177           else:
5178             val = None
5179         elif field == "nic_mode":
5180           if instance.nics:
5181             val = i_nicp[0][constants.NIC_MODE]
5182           else:
5183             val = None
5184         elif field == "nic_link":
5185           if instance.nics:
5186             val = i_nicp[0][constants.NIC_LINK]
5187           else:
5188             val = None
5189         elif field == "bridge":
5190           if (instance.nics and
5191               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5192             val = i_nicp[0][constants.NIC_LINK]
5193           else:
5194             val = None
5195         elif field == "mac":
5196           if instance.nics:
5197             val = instance.nics[0].mac
5198           else:
5199             val = None
5200         elif field == "custom_nicparams":
5201           val = [nic.nicparams for nic in instance.nics]
5202         elif field == "sda_size" or field == "sdb_size":
5203           idx = ord(field[2]) - ord('a')
5204           try:
5205             val = instance.FindDisk(idx).size
5206           except errors.OpPrereqError:
5207             val = None
5208         elif field == "disk_usage": # total disk usage per node
5209           disk_sizes = [{'size': disk.size} for disk in instance.disks]
5210           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5211         elif field == "tags":
5212           val = list(instance.GetTags())
5213         elif field == "custom_hvparams":
5214           val = instance.hvparams # not filled!
5215         elif field == "hvparams":
5216           val = i_hv
5217         elif (field.startswith(HVPREFIX) and
5218               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5219               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5220           val = i_hv.get(field[len(HVPREFIX):], None)
5221         elif field == "custom_beparams":
5222           val = instance.beparams
5223         elif field == "beparams":
5224           val = i_be
5225         elif (field.startswith(BEPREFIX) and
5226               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5227           val = i_be.get(field[len(BEPREFIX):], None)
5228         elif st_match and st_match.groups():
5229           # matches a variable list
5230           st_groups = st_match.groups()
5231           if st_groups and st_groups[0] == "disk":
5232             if st_groups[1] == "count":
5233               val = len(instance.disks)
5234             elif st_groups[1] == "sizes":
5235               val = [disk.size for disk in instance.disks]
5236             elif st_groups[1] == "size":
5237               try:
5238                 val = instance.FindDisk(st_groups[2]).size
5239               except errors.OpPrereqError:
5240                 val = None
5241             else:
5242               assert False, "Unhandled disk parameter"
5243           elif st_groups[0] == "nic":
5244             if st_groups[1] == "count":
5245               val = len(instance.nics)
5246             elif st_groups[1] == "macs":
5247               val = [nic.mac for nic in instance.nics]
5248             elif st_groups[1] == "ips":
5249               val = [nic.ip for nic in instance.nics]
5250             elif st_groups[1] == "modes":
5251               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5252             elif st_groups[1] == "links":
5253               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5254             elif st_groups[1] == "bridges":
5255               val = []
5256               for nicp in i_nicp:
5257                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5258                   val.append(nicp[constants.NIC_LINK])
5259                 else:
5260                   val.append(None)
5261             else:
5262               # index-based item
5263               nic_idx = int(st_groups[2])
5264               if nic_idx >= len(instance.nics):
5265                 val = None
5266               else:
5267                 if st_groups[1] == "mac":
5268                   val = instance.nics[nic_idx].mac
5269                 elif st_groups[1] == "ip":
5270                   val = instance.nics[nic_idx].ip
5271                 elif st_groups[1] == "mode":
5272                   val = i_nicp[nic_idx][constants.NIC_MODE]
5273                 elif st_groups[1] == "link":
5274                   val = i_nicp[nic_idx][constants.NIC_LINK]
5275                 elif st_groups[1] == "bridge":
5276                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5277                   if nic_mode == constants.NIC_MODE_BRIDGED:
5278                     val = i_nicp[nic_idx][constants.NIC_LINK]
5279                   else:
5280                     val = None
5281                 else:
5282                   assert False, "Unhandled NIC parameter"
5283           else:
5284             assert False, ("Declared but unhandled variable parameter '%s'" %
5285                            field)
5286         else:
5287           assert False, "Declared but unhandled parameter '%s'" % field
5288         iout.append(val)
5289       output.append(iout)
5290
5291     return output
5292
5293
5294 class LUFailoverInstance(LogicalUnit):
5295   """Failover an instance.
5296
5297   """
5298   HPATH = "instance-failover"
5299   HTYPE = constants.HTYPE_INSTANCE
5300   _OP_PARAMS = [
5301     _PInstanceName,
5302     ("ignore_consistency", False, ht.TBool),
5303     _PShutdownTimeout,
5304     ]
5305   REQ_BGL = False
5306
5307   def ExpandNames(self):
5308     self._ExpandAndLockInstance()
5309     self.needed_locks[locking.LEVEL_NODE] = []
5310     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5311
5312   def DeclareLocks(self, level):
5313     if level == locking.LEVEL_NODE:
5314       self._LockInstancesNodes()
5315
5316   def BuildHooksEnv(self):
5317     """Build hooks env.
5318
5319     This runs on master, primary and secondary nodes of the instance.
5320
5321     """
5322     instance = self.instance
5323     source_node = instance.primary_node
5324     target_node = instance.secondary_nodes[0]
5325     env = {
5326       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5327       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5328       "OLD_PRIMARY": source_node,
5329       "OLD_SECONDARY": target_node,
5330       "NEW_PRIMARY": target_node,
5331       "NEW_SECONDARY": source_node,
5332       }
5333     env.update(_BuildInstanceHookEnvByObject(self, instance))
5334     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5335     nl_post = list(nl)
5336     nl_post.append(source_node)
5337     return env, nl, nl_post
5338
5339   def CheckPrereq(self):
5340     """Check prerequisites.
5341
5342     This checks that the instance is in the cluster.
5343
5344     """
5345     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5346     assert self.instance is not None, \
5347       "Cannot retrieve locked instance %s" % self.op.instance_name
5348
5349     bep = self.cfg.GetClusterInfo().FillBE(instance)
5350     if instance.disk_template not in constants.DTS_NET_MIRROR:
5351       raise errors.OpPrereqError("Instance's disk layout is not"
5352                                  " network mirrored, cannot failover.",
5353                                  errors.ECODE_STATE)
5354
5355     secondary_nodes = instance.secondary_nodes
5356     if not secondary_nodes:
5357       raise errors.ProgrammerError("no secondary node but using "
5358                                    "a mirrored disk template")
5359
5360     target_node = secondary_nodes[0]
5361     _CheckNodeOnline(self, target_node)
5362     _CheckNodeNotDrained(self, target_node)
5363     if instance.admin_up:
5364       # check memory requirements on the secondary node
5365       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5366                            instance.name, bep[constants.BE_MEMORY],
5367                            instance.hypervisor)
5368     else:
5369       self.LogInfo("Not checking memory on the secondary node as"
5370                    " instance will not be started")
5371
5372     # check bridge existance
5373     _CheckInstanceBridgesExist(self, instance, node=target_node)
5374
5375   def Exec(self, feedback_fn):
5376     """Failover an instance.
5377
5378     The failover is done by shutting it down on its present node and
5379     starting it on the secondary.
5380
5381     """
5382     instance = self.instance
5383     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
5384
5385     source_node = instance.primary_node
5386     target_node = instance.secondary_nodes[0]
5387
5388     if instance.admin_up:
5389       feedback_fn("* checking disk consistency between source and target")
5390       for dev in instance.disks:
5391         # for drbd, these are drbd over lvm
5392         if not _CheckDiskConsistency(self, dev, target_node, False):
5393           if not self.op.ignore_consistency:
5394             raise errors.OpExecError("Disk %s is degraded on target node,"
5395                                      " aborting failover." % dev.iv_name)
5396     else:
5397       feedback_fn("* not checking disk consistency as instance is not running")
5398
5399     feedback_fn("* shutting down instance on source node")
5400     logging.info("Shutting down instance %s on node %s",
5401                  instance.name, source_node)
5402
5403     result = self.rpc.call_instance_shutdown(source_node, instance,
5404                                              self.op.shutdown_timeout)
5405     msg = result.fail_msg
5406     if msg:
5407       if self.op.ignore_consistency or primary_node.offline:
5408         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5409                              " Proceeding anyway. Please make sure node"
5410                              " %s is down. Error details: %s",
5411                              instance.name, source_node, source_node, msg)
5412       else:
5413         raise errors.OpExecError("Could not shutdown instance %s on"
5414                                  " node %s: %s" %
5415                                  (instance.name, source_node, msg))
5416
5417     feedback_fn("* deactivating the instance's disks on source node")
5418     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5419       raise errors.OpExecError("Can't shut down the instance's disks.")
5420
5421     instance.primary_node = target_node
5422     # distribute new instance config to the other nodes
5423     self.cfg.Update(instance, feedback_fn)
5424
5425     # Only start the instance if it's marked as up
5426     if instance.admin_up:
5427       feedback_fn("* activating the instance's disks on target node")
5428       logging.info("Starting instance %s on node %s",
5429                    instance.name, target_node)
5430
5431       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5432                                            ignore_secondaries=True)
5433       if not disks_ok:
5434         _ShutdownInstanceDisks(self, instance)
5435         raise errors.OpExecError("Can't activate the instance's disks")
5436
5437       feedback_fn("* starting the instance on the target node")
5438       result = self.rpc.call_instance_start(target_node, instance, None, None)
5439       msg = result.fail_msg
5440       if msg:
5441         _ShutdownInstanceDisks(self, instance)
5442         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5443                                  (instance.name, target_node, msg))
5444
5445
5446 class LUMigrateInstance(LogicalUnit):
5447   """Migrate an instance.
5448
5449   This is migration without shutting down, compared to the failover,
5450   which is done with shutdown.
5451
5452   """
5453   HPATH = "instance-migrate"
5454   HTYPE = constants.HTYPE_INSTANCE
5455   _OP_PARAMS = [
5456     _PInstanceName,
5457     _PMigrationMode,
5458     _PMigrationLive,
5459     ("cleanup", False, ht.TBool),
5460     ]
5461
5462   REQ_BGL = False
5463
5464   def ExpandNames(self):
5465     self._ExpandAndLockInstance()
5466
5467     self.needed_locks[locking.LEVEL_NODE] = []
5468     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5469
5470     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5471                                        self.op.cleanup)
5472     self.tasklets = [self._migrater]
5473
5474   def DeclareLocks(self, level):
5475     if level == locking.LEVEL_NODE:
5476       self._LockInstancesNodes()
5477
5478   def BuildHooksEnv(self):
5479     """Build hooks env.
5480
5481     This runs on master, primary and secondary nodes of the instance.
5482
5483     """
5484     instance = self._migrater.instance
5485     source_node = instance.primary_node
5486     target_node = instance.secondary_nodes[0]
5487     env = _BuildInstanceHookEnvByObject(self, instance)
5488     env["MIGRATE_LIVE"] = self._migrater.live
5489     env["MIGRATE_CLEANUP"] = self.op.cleanup
5490     env.update({
5491         "OLD_PRIMARY": source_node,
5492         "OLD_SECONDARY": target_node,
5493         "NEW_PRIMARY": target_node,
5494         "NEW_SECONDARY": source_node,
5495         })
5496     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5497     nl_post = list(nl)
5498     nl_post.append(source_node)
5499     return env, nl, nl_post
5500
5501
5502 class LUMoveInstance(LogicalUnit):
5503   """Move an instance by data-copying.
5504
5505   """
5506   HPATH = "instance-move"
5507   HTYPE = constants.HTYPE_INSTANCE
5508   _OP_PARAMS = [
5509     _PInstanceName,
5510     ("target_node", ht.NoDefault, ht.TNonEmptyString),
5511     _PShutdownTimeout,
5512     ]
5513   REQ_BGL = False
5514
5515   def ExpandNames(self):
5516     self._ExpandAndLockInstance()
5517     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5518     self.op.target_node = target_node
5519     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5520     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5521
5522   def DeclareLocks(self, level):
5523     if level == locking.LEVEL_NODE:
5524       self._LockInstancesNodes(primary_only=True)
5525
5526   def BuildHooksEnv(self):
5527     """Build hooks env.
5528
5529     This runs on master, primary and secondary nodes of the instance.
5530
5531     """
5532     env = {
5533       "TARGET_NODE": self.op.target_node,
5534       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5535       }
5536     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5537     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5538                                        self.op.target_node]
5539     return env, nl, nl
5540
5541   def CheckPrereq(self):
5542     """Check prerequisites.
5543
5544     This checks that the instance is in the cluster.
5545
5546     """
5547     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5548     assert self.instance is not None, \
5549       "Cannot retrieve locked instance %s" % self.op.instance_name
5550
5551     node = self.cfg.GetNodeInfo(self.op.target_node)
5552     assert node is not None, \
5553       "Cannot retrieve locked node %s" % self.op.target_node
5554
5555     self.target_node = target_node = node.name
5556
5557     if target_node == instance.primary_node:
5558       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5559                                  (instance.name, target_node),
5560                                  errors.ECODE_STATE)
5561
5562     bep = self.cfg.GetClusterInfo().FillBE(instance)
5563
5564     for idx, dsk in enumerate(instance.disks):
5565       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5566         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5567                                    " cannot copy" % idx, errors.ECODE_STATE)
5568
5569     _CheckNodeOnline(self, target_node)
5570     _CheckNodeNotDrained(self, target_node)
5571
5572     if instance.admin_up:
5573       # check memory requirements on the secondary node
5574       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5575                            instance.name, bep[constants.BE_MEMORY],
5576                            instance.hypervisor)
5577     else:
5578       self.LogInfo("Not checking memory on the secondary node as"
5579                    " instance will not be started")
5580
5581     # check bridge existance
5582     _CheckInstanceBridgesExist(self, instance, node=target_node)
5583
5584   def Exec(self, feedback_fn):
5585     """Move an instance.
5586
5587     The move is done by shutting it down on its present node, copying
5588     the data over (slow) and starting it on the new node.
5589
5590     """
5591     instance = self.instance
5592
5593     source_node = instance.primary_node
5594     target_node = self.target_node
5595
5596     self.LogInfo("Shutting down instance %s on source node %s",
5597                  instance.name, source_node)
5598
5599     result = self.rpc.call_instance_shutdown(source_node, instance,
5600                                              self.op.shutdown_timeout)
5601     msg = result.fail_msg
5602     if msg:
5603       if self.op.ignore_consistency:
5604         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5605                              " Proceeding anyway. Please make sure node"
5606                              " %s is down. Error details: %s",
5607                              instance.name, source_node, source_node, msg)
5608       else:
5609         raise errors.OpExecError("Could not shutdown instance %s on"
5610                                  " node %s: %s" %
5611                                  (instance.name, source_node, msg))
5612
5613     # create the target disks
5614     try:
5615       _CreateDisks(self, instance, target_node=target_node)
5616     except errors.OpExecError:
5617       self.LogWarning("Device creation failed, reverting...")
5618       try:
5619         _RemoveDisks(self, instance, target_node=target_node)
5620       finally:
5621         self.cfg.ReleaseDRBDMinors(instance.name)
5622         raise
5623
5624     cluster_name = self.cfg.GetClusterInfo().cluster_name
5625
5626     errs = []
5627     # activate, get path, copy the data over
5628     for idx, disk in enumerate(instance.disks):
5629       self.LogInfo("Copying data for disk %d", idx)
5630       result = self.rpc.call_blockdev_assemble(target_node, disk,
5631                                                instance.name, True)
5632       if result.fail_msg:
5633         self.LogWarning("Can't assemble newly created disk %d: %s",
5634                         idx, result.fail_msg)
5635         errs.append(result.fail_msg)
5636         break
5637       dev_path = result.payload
5638       result = self.rpc.call_blockdev_export(source_node, disk,
5639                                              target_node, dev_path,
5640                                              cluster_name)
5641       if result.fail_msg:
5642         self.LogWarning("Can't copy data over for disk %d: %s",
5643                         idx, result.fail_msg)
5644         errs.append(result.fail_msg)
5645         break
5646
5647     if errs:
5648       self.LogWarning("Some disks failed to copy, aborting")
5649       try:
5650         _RemoveDisks(self, instance, target_node=target_node)
5651       finally:
5652         self.cfg.ReleaseDRBDMinors(instance.name)
5653         raise errors.OpExecError("Errors during disk copy: %s" %
5654                                  (",".join(errs),))
5655
5656     instance.primary_node = target_node
5657     self.cfg.Update(instance, feedback_fn)
5658
5659     self.LogInfo("Removing the disks on the original node")
5660     _RemoveDisks(self, instance, target_node=source_node)
5661
5662     # Only start the instance if it's marked as up
5663     if instance.admin_up:
5664       self.LogInfo("Starting instance %s on node %s",
5665                    instance.name, target_node)
5666
5667       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5668                                            ignore_secondaries=True)
5669       if not disks_ok:
5670         _ShutdownInstanceDisks(self, instance)
5671         raise errors.OpExecError("Can't activate the instance's disks")
5672
5673       result = self.rpc.call_instance_start(target_node, instance, None, None)
5674       msg = result.fail_msg
5675       if msg:
5676         _ShutdownInstanceDisks(self, instance)
5677         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5678                                  (instance.name, target_node, msg))
5679
5680
5681 class LUMigrateNode(LogicalUnit):
5682   """Migrate all instances from a node.
5683
5684   """
5685   HPATH = "node-migrate"
5686   HTYPE = constants.HTYPE_NODE
5687   _OP_PARAMS = [
5688     _PNodeName,
5689     _PMigrationMode,
5690     _PMigrationLive,
5691     ]
5692   REQ_BGL = False
5693
5694   def ExpandNames(self):
5695     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5696
5697     self.needed_locks = {
5698       locking.LEVEL_NODE: [self.op.node_name],
5699       }
5700
5701     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5702
5703     # Create tasklets for migrating instances for all instances on this node
5704     names = []
5705     tasklets = []
5706
5707     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5708       logging.debug("Migrating instance %s", inst.name)
5709       names.append(inst.name)
5710
5711       tasklets.append(TLMigrateInstance(self, inst.name, False))
5712
5713     self.tasklets = tasklets
5714
5715     # Declare instance locks
5716     self.needed_locks[locking.LEVEL_INSTANCE] = names
5717
5718   def DeclareLocks(self, level):
5719     if level == locking.LEVEL_NODE:
5720       self._LockInstancesNodes()
5721
5722   def BuildHooksEnv(self):
5723     """Build hooks env.
5724
5725     This runs on the master, the primary and all the secondaries.
5726
5727     """
5728     env = {
5729       "NODE_NAME": self.op.node_name,
5730       }
5731
5732     nl = [self.cfg.GetMasterNode()]
5733
5734     return (env, nl, nl)
5735
5736
5737 class TLMigrateInstance(Tasklet):
5738   """Tasklet class for instance migration.
5739
5740   @type live: boolean
5741   @ivar live: whether the migration will be done live or non-live;
5742       this variable is initalized only after CheckPrereq has run
5743
5744   """
5745   def __init__(self, lu, instance_name, cleanup):
5746     """Initializes this class.
5747
5748     """
5749     Tasklet.__init__(self, lu)
5750
5751     # Parameters
5752     self.instance_name = instance_name
5753     self.cleanup = cleanup
5754     self.live = False # will be overridden later
5755
5756   def CheckPrereq(self):
5757     """Check prerequisites.
5758
5759     This checks that the instance is in the cluster.
5760
5761     """
5762     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5763     instance = self.cfg.GetInstanceInfo(instance_name)
5764     assert instance is not None
5765
5766     if instance.disk_template != constants.DT_DRBD8:
5767       raise errors.OpPrereqError("Instance's disk layout is not"
5768                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5769
5770     secondary_nodes = instance.secondary_nodes
5771     if not secondary_nodes:
5772       raise errors.ConfigurationError("No secondary node but using"
5773                                       " drbd8 disk template")
5774
5775     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5776
5777     target_node = secondary_nodes[0]
5778     # check memory requirements on the secondary node
5779     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5780                          instance.name, i_be[constants.BE_MEMORY],
5781                          instance.hypervisor)
5782
5783     # check bridge existance
5784     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5785
5786     if not self.cleanup:
5787       _CheckNodeNotDrained(self.lu, target_node)
5788       result = self.rpc.call_instance_migratable(instance.primary_node,
5789                                                  instance)
5790       result.Raise("Can't migrate, please use failover",
5791                    prereq=True, ecode=errors.ECODE_STATE)
5792
5793     self.instance = instance
5794
5795     if self.lu.op.live is not None and self.lu.op.mode is not None:
5796       raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
5797                                  " parameters are accepted",
5798                                  errors.ECODE_INVAL)
5799     if self.lu.op.live is not None:
5800       if self.lu.op.live:
5801         self.lu.op.mode = constants.HT_MIGRATION_LIVE
5802       else:
5803         self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
5804       # reset the 'live' parameter to None so that repeated
5805       # invocations of CheckPrereq do not raise an exception
5806       self.lu.op.live = None
5807     elif self.lu.op.mode is None:
5808       # read the default value from the hypervisor
5809       i_hv = self.cfg.GetClusterInfo().FillHV(instance, skip_globals=False)
5810       self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
5811
5812     self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
5813
5814   def _WaitUntilSync(self):
5815     """Poll with custom rpc for disk sync.
5816
5817     This uses our own step-based rpc call.
5818
5819     """
5820     self.feedback_fn("* wait until resync is done")
5821     all_done = False
5822     while not all_done:
5823       all_done = True
5824       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5825                                             self.nodes_ip,
5826                                             self.instance.disks)
5827       min_percent = 100
5828       for node, nres in result.items():
5829         nres.Raise("Cannot resync disks on node %s" % node)
5830         node_done, node_percent = nres.payload
5831         all_done = all_done and node_done
5832         if node_percent is not None:
5833           min_percent = min(min_percent, node_percent)
5834       if not all_done:
5835         if min_percent < 100:
5836           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5837         time.sleep(2)
5838
5839   def _EnsureSecondary(self, node):
5840     """Demote a node to secondary.
5841
5842     """
5843     self.feedback_fn("* switching node %s to secondary mode" % node)
5844
5845     for dev in self.instance.disks:
5846       self.cfg.SetDiskID(dev, node)
5847
5848     result = self.rpc.call_blockdev_close(node, self.instance.name,
5849                                           self.instance.disks)
5850     result.Raise("Cannot change disk to secondary on node %s" % node)
5851
5852   def _GoStandalone(self):
5853     """Disconnect from the network.
5854
5855     """
5856     self.feedback_fn("* changing into standalone mode")
5857     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5858                                                self.instance.disks)
5859     for node, nres in result.items():
5860       nres.Raise("Cannot disconnect disks node %s" % node)
5861
5862   def _GoReconnect(self, multimaster):
5863     """Reconnect to the network.
5864
5865     """
5866     if multimaster:
5867       msg = "dual-master"
5868     else:
5869       msg = "single-master"
5870     self.feedback_fn("* changing disks into %s mode" % msg)
5871     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5872                                            self.instance.disks,
5873                                            self.instance.name, multimaster)
5874     for node, nres in result.items():
5875       nres.Raise("Cannot change disks config on node %s" % node)
5876
5877   def _ExecCleanup(self):
5878     """Try to cleanup after a failed migration.
5879
5880     The cleanup is done by:
5881       - check that the instance is running only on one node
5882         (and update the config if needed)
5883       - change disks on its secondary node to secondary
5884       - wait until disks are fully synchronized
5885       - disconnect from the network
5886       - change disks into single-master mode
5887       - wait again until disks are fully synchronized
5888
5889     """
5890     instance = self.instance
5891     target_node = self.target_node
5892     source_node = self.source_node
5893
5894     # check running on only one node
5895     self.feedback_fn("* checking where the instance actually runs"
5896                      " (if this hangs, the hypervisor might be in"
5897                      " a bad state)")
5898     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5899     for node, result in ins_l.items():
5900       result.Raise("Can't contact node %s" % node)
5901
5902     runningon_source = instance.name in ins_l[source_node].payload
5903     runningon_target = instance.name in ins_l[target_node].payload
5904
5905     if runningon_source and runningon_target:
5906       raise errors.OpExecError("Instance seems to be running on two nodes,"
5907                                " or the hypervisor is confused. You will have"
5908                                " to ensure manually that it runs only on one"
5909                                " and restart this operation.")
5910
5911     if not (runningon_source or runningon_target):
5912       raise errors.OpExecError("Instance does not seem to be running at all."
5913                                " In this case, it's safer to repair by"
5914                                " running 'gnt-instance stop' to ensure disk"
5915                                " shutdown, and then restarting it.")
5916
5917     if runningon_target:
5918       # the migration has actually succeeded, we need to update the config
5919       self.feedback_fn("* instance running on secondary node (%s),"
5920                        " updating config" % target_node)
5921       instance.primary_node = target_node
5922       self.cfg.Update(instance, self.feedback_fn)
5923       demoted_node = source_node
5924     else:
5925       self.feedback_fn("* instance confirmed to be running on its"
5926                        " primary node (%s)" % source_node)
5927       demoted_node = target_node
5928
5929     self._EnsureSecondary(demoted_node)
5930     try:
5931       self._WaitUntilSync()
5932     except errors.OpExecError:
5933       # we ignore here errors, since if the device is standalone, it
5934       # won't be able to sync
5935       pass
5936     self._GoStandalone()
5937     self._GoReconnect(False)
5938     self._WaitUntilSync()
5939
5940     self.feedback_fn("* done")
5941
5942   def _RevertDiskStatus(self):
5943     """Try to revert the disk status after a failed migration.
5944
5945     """
5946     target_node = self.target_node
5947     try:
5948       self._EnsureSecondary(target_node)
5949       self._GoStandalone()
5950       self._GoReconnect(False)
5951       self._WaitUntilSync()
5952     except errors.OpExecError, err:
5953       self.lu.LogWarning("Migration failed and I can't reconnect the"
5954                          " drives: error '%s'\n"
5955                          "Please look and recover the instance status" %
5956                          str(err))
5957
5958   def _AbortMigration(self):
5959     """Call the hypervisor code to abort a started migration.
5960
5961     """
5962     instance = self.instance
5963     target_node = self.target_node
5964     migration_info = self.migration_info
5965
5966     abort_result = self.rpc.call_finalize_migration(target_node,
5967                                                     instance,
5968                                                     migration_info,
5969                                                     False)
5970     abort_msg = abort_result.fail_msg
5971     if abort_msg:
5972       logging.error("Aborting migration failed on target node %s: %s",
5973                     target_node, abort_msg)
5974       # Don't raise an exception here, as we stil have to try to revert the
5975       # disk status, even if this step failed.
5976
5977   def _ExecMigration(self):
5978     """Migrate an instance.
5979
5980     The migrate is done by:
5981       - change the disks into dual-master mode
5982       - wait until disks are fully synchronized again
5983       - migrate the instance
5984       - change disks on the new secondary node (the old primary) to secondary
5985       - wait until disks are fully synchronized
5986       - change disks into single-master mode
5987
5988     """
5989     instance = self.instance
5990     target_node = self.target_node
5991     source_node = self.source_node
5992
5993     self.feedback_fn("* checking disk consistency between source and target")
5994     for dev in instance.disks:
5995       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5996         raise errors.OpExecError("Disk %s is degraded or not fully"
5997                                  " synchronized on target node,"
5998                                  " aborting migrate." % dev.iv_name)
5999
6000     # First get the migration information from the remote node
6001     result = self.rpc.call_migration_info(source_node, instance)
6002     msg = result.fail_msg
6003     if msg:
6004       log_err = ("Failed fetching source migration information from %s: %s" %
6005                  (source_node, msg))
6006       logging.error(log_err)
6007       raise errors.OpExecError(log_err)
6008
6009     self.migration_info = migration_info = result.payload
6010
6011     # Then switch the disks to master/master mode
6012     self._EnsureSecondary(target_node)
6013     self._GoStandalone()
6014     self._GoReconnect(True)
6015     self._WaitUntilSync()
6016
6017     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6018     result = self.rpc.call_accept_instance(target_node,
6019                                            instance,
6020                                            migration_info,
6021                                            self.nodes_ip[target_node])
6022
6023     msg = result.fail_msg
6024     if msg:
6025       logging.error("Instance pre-migration failed, trying to revert"
6026                     " disk status: %s", msg)
6027       self.feedback_fn("Pre-migration failed, aborting")
6028       self._AbortMigration()
6029       self._RevertDiskStatus()
6030       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6031                                (instance.name, msg))
6032
6033     self.feedback_fn("* migrating instance to %s" % target_node)
6034     time.sleep(10)
6035     result = self.rpc.call_instance_migrate(source_node, instance,
6036                                             self.nodes_ip[target_node],
6037                                             self.live)
6038     msg = result.fail_msg
6039     if msg:
6040       logging.error("Instance migration failed, trying to revert"
6041                     " disk status: %s", msg)
6042       self.feedback_fn("Migration failed, aborting")
6043       self._AbortMigration()
6044       self._RevertDiskStatus()
6045       raise errors.OpExecError("Could not migrate instance %s: %s" %
6046                                (instance.name, msg))
6047     time.sleep(10)
6048
6049     instance.primary_node = target_node
6050     # distribute new instance config to the other nodes
6051     self.cfg.Update(instance, self.feedback_fn)
6052
6053     result = self.rpc.call_finalize_migration(target_node,
6054                                               instance,
6055                                               migration_info,
6056                                               True)
6057     msg = result.fail_msg
6058     if msg:
6059       logging.error("Instance migration succeeded, but finalization failed:"
6060                     " %s", msg)
6061       raise errors.OpExecError("Could not finalize instance migration: %s" %
6062                                msg)
6063
6064     self._EnsureSecondary(source_node)
6065     self._WaitUntilSync()
6066     self._GoStandalone()
6067     self._GoReconnect(False)
6068     self._WaitUntilSync()
6069
6070     self.feedback_fn("* done")
6071
6072   def Exec(self, feedback_fn):
6073     """Perform the migration.
6074
6075     """
6076     feedback_fn("Migrating instance %s" % self.instance.name)
6077
6078     self.feedback_fn = feedback_fn
6079
6080     self.source_node = self.instance.primary_node
6081     self.target_node = self.instance.secondary_nodes[0]
6082     self.all_nodes = [self.source_node, self.target_node]
6083     self.nodes_ip = {
6084       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6085       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6086       }
6087
6088     if self.cleanup:
6089       return self._ExecCleanup()
6090     else:
6091       return self._ExecMigration()
6092
6093
6094 def _CreateBlockDev(lu, node, instance, device, force_create,
6095                     info, force_open):
6096   """Create a tree of block devices on a given node.
6097
6098   If this device type has to be created on secondaries, create it and
6099   all its children.
6100
6101   If not, just recurse to children keeping the same 'force' value.
6102
6103   @param lu: the lu on whose behalf we execute
6104   @param node: the node on which to create the device
6105   @type instance: L{objects.Instance}
6106   @param instance: the instance which owns the device
6107   @type device: L{objects.Disk}
6108   @param device: the device to create
6109   @type force_create: boolean
6110   @param force_create: whether to force creation of this device; this
6111       will be change to True whenever we find a device which has
6112       CreateOnSecondary() attribute
6113   @param info: the extra 'metadata' we should attach to the device
6114       (this will be represented as a LVM tag)
6115   @type force_open: boolean
6116   @param force_open: this parameter will be passes to the
6117       L{backend.BlockdevCreate} function where it specifies
6118       whether we run on primary or not, and it affects both
6119       the child assembly and the device own Open() execution
6120
6121   """
6122   if device.CreateOnSecondary():
6123     force_create = True
6124
6125   if device.children:
6126     for child in device.children:
6127       _CreateBlockDev(lu, node, instance, child, force_create,
6128                       info, force_open)
6129
6130   if not force_create:
6131     return
6132
6133   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6134
6135
6136 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6137   """Create a single block device on a given node.
6138
6139   This will not recurse over children of the device, so they must be
6140   created in advance.
6141
6142   @param lu: the lu on whose behalf we execute
6143   @param node: the node on which to create the device
6144   @type instance: L{objects.Instance}
6145   @param instance: the instance which owns the device
6146   @type device: L{objects.Disk}
6147   @param device: the device to create
6148   @param info: the extra 'metadata' we should attach to the device
6149       (this will be represented as a LVM tag)
6150   @type force_open: boolean
6151   @param force_open: this parameter will be passes to the
6152       L{backend.BlockdevCreate} function where it specifies
6153       whether we run on primary or not, and it affects both
6154       the child assembly and the device own Open() execution
6155
6156   """
6157   lu.cfg.SetDiskID(device, node)
6158   result = lu.rpc.call_blockdev_create(node, device, device.size,
6159                                        instance.name, force_open, info)
6160   result.Raise("Can't create block device %s on"
6161                " node %s for instance %s" % (device, node, instance.name))
6162   if device.physical_id is None:
6163     device.physical_id = result.payload
6164
6165
6166 def _GenerateUniqueNames(lu, exts):
6167   """Generate a suitable LV name.
6168
6169   This will generate a logical volume name for the given instance.
6170
6171   """
6172   results = []
6173   for val in exts:
6174     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6175     results.append("%s%s" % (new_id, val))
6176   return results
6177
6178
6179 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6180                          p_minor, s_minor):
6181   """Generate a drbd8 device complete with its children.
6182
6183   """
6184   port = lu.cfg.AllocatePort()
6185   vgname = lu.cfg.GetVGName()
6186   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6187   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6188                           logical_id=(vgname, names[0]))
6189   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6190                           logical_id=(vgname, names[1]))
6191   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6192                           logical_id=(primary, secondary, port,
6193                                       p_minor, s_minor,
6194                                       shared_secret),
6195                           children=[dev_data, dev_meta],
6196                           iv_name=iv_name)
6197   return drbd_dev
6198
6199
6200 def _GenerateDiskTemplate(lu, template_name,
6201                           instance_name, primary_node,
6202                           secondary_nodes, disk_info,
6203                           file_storage_dir, file_driver,
6204                           base_index):
6205   """Generate the entire disk layout for a given template type.
6206
6207   """
6208   #TODO: compute space requirements
6209
6210   vgname = lu.cfg.GetVGName()
6211   disk_count = len(disk_info)
6212   disks = []
6213   if template_name == constants.DT_DISKLESS:
6214     pass
6215   elif template_name == constants.DT_PLAIN:
6216     if len(secondary_nodes) != 0:
6217       raise errors.ProgrammerError("Wrong template configuration")
6218
6219     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6220                                       for i in range(disk_count)])
6221     for idx, disk in enumerate(disk_info):
6222       disk_index = idx + base_index
6223       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6224                               logical_id=(vgname, names[idx]),
6225                               iv_name="disk/%d" % disk_index,
6226                               mode=disk["mode"])
6227       disks.append(disk_dev)
6228   elif template_name == constants.DT_DRBD8:
6229     if len(secondary_nodes) != 1:
6230       raise errors.ProgrammerError("Wrong template configuration")
6231     remote_node = secondary_nodes[0]
6232     minors = lu.cfg.AllocateDRBDMinor(
6233       [primary_node, remote_node] * len(disk_info), instance_name)
6234
6235     names = []
6236     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6237                                                for i in range(disk_count)]):
6238       names.append(lv_prefix + "_data")
6239       names.append(lv_prefix + "_meta")
6240     for idx, disk in enumerate(disk_info):
6241       disk_index = idx + base_index
6242       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6243                                       disk["size"], names[idx*2:idx*2+2],
6244                                       "disk/%d" % disk_index,
6245                                       minors[idx*2], minors[idx*2+1])
6246       disk_dev.mode = disk["mode"]
6247       disks.append(disk_dev)
6248   elif template_name == constants.DT_FILE:
6249     if len(secondary_nodes) != 0:
6250       raise errors.ProgrammerError("Wrong template configuration")
6251
6252     _RequireFileStorage()
6253
6254     for idx, disk in enumerate(disk_info):
6255       disk_index = idx + base_index
6256       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6257                               iv_name="disk/%d" % disk_index,
6258                               logical_id=(file_driver,
6259                                           "%s/disk%d" % (file_storage_dir,
6260                                                          disk_index)),
6261                               mode=disk["mode"])
6262       disks.append(disk_dev)
6263   else:
6264     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6265   return disks
6266
6267
6268 def _GetInstanceInfoText(instance):
6269   """Compute that text that should be added to the disk's metadata.
6270
6271   """
6272   return "originstname+%s" % instance.name
6273
6274
6275 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6276   """Create all disks for an instance.
6277
6278   This abstracts away some work from AddInstance.
6279
6280   @type lu: L{LogicalUnit}
6281   @param lu: the logical unit on whose behalf we execute
6282   @type instance: L{objects.Instance}
6283   @param instance: the instance whose disks we should create
6284   @type to_skip: list
6285   @param to_skip: list of indices to skip
6286   @type target_node: string
6287   @param target_node: if passed, overrides the target node for creation
6288   @rtype: boolean
6289   @return: the success of the creation
6290
6291   """
6292   info = _GetInstanceInfoText(instance)
6293   if target_node is None:
6294     pnode = instance.primary_node
6295     all_nodes = instance.all_nodes
6296   else:
6297     pnode = target_node
6298     all_nodes = [pnode]
6299
6300   if instance.disk_template == constants.DT_FILE:
6301     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6302     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6303
6304     result.Raise("Failed to create directory '%s' on"
6305                  " node %s" % (file_storage_dir, pnode))
6306
6307   # Note: this needs to be kept in sync with adding of disks in
6308   # LUSetInstanceParams
6309   for idx, device in enumerate(instance.disks):
6310     if to_skip and idx in to_skip:
6311       continue
6312     logging.info("Creating volume %s for instance %s",
6313                  device.iv_name, instance.name)
6314     #HARDCODE
6315     for node in all_nodes:
6316       f_create = node == pnode
6317       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6318
6319
6320 def _RemoveDisks(lu, instance, target_node=None):
6321   """Remove all disks for an instance.
6322
6323   This abstracts away some work from `AddInstance()` and
6324   `RemoveInstance()`. Note that in case some of the devices couldn't
6325   be removed, the removal will continue with the other ones (compare
6326   with `_CreateDisks()`).
6327
6328   @type lu: L{LogicalUnit}
6329   @param lu: the logical unit on whose behalf we execute
6330   @type instance: L{objects.Instance}
6331   @param instance: the instance whose disks we should remove
6332   @type target_node: string
6333   @param target_node: used to override the node on which to remove the disks
6334   @rtype: boolean
6335   @return: the success of the removal
6336
6337   """
6338   logging.info("Removing block devices for instance %s", instance.name)
6339
6340   all_result = True
6341   for device in instance.disks:
6342     if target_node:
6343       edata = [(target_node, device)]
6344     else:
6345       edata = device.ComputeNodeTree(instance.primary_node)
6346     for node, disk in edata:
6347       lu.cfg.SetDiskID(disk, node)
6348       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6349       if msg:
6350         lu.LogWarning("Could not remove block device %s on node %s,"
6351                       " continuing anyway: %s", device.iv_name, node, msg)
6352         all_result = False
6353
6354   if instance.disk_template == constants.DT_FILE:
6355     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6356     if target_node:
6357       tgt = target_node
6358     else:
6359       tgt = instance.primary_node
6360     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6361     if result.fail_msg:
6362       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6363                     file_storage_dir, instance.primary_node, result.fail_msg)
6364       all_result = False
6365
6366   return all_result
6367
6368
6369 def _ComputeDiskSize(disk_template, disks):
6370   """Compute disk size requirements in the volume group
6371
6372   """
6373   # Required free disk space as a function of disk and swap space
6374   req_size_dict = {
6375     constants.DT_DISKLESS: None,
6376     constants.DT_PLAIN: sum(d["size"] for d in disks),
6377     # 128 MB are added for drbd metadata for each disk
6378     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6379     constants.DT_FILE: None,
6380   }
6381
6382   if disk_template not in req_size_dict:
6383     raise errors.ProgrammerError("Disk template '%s' size requirement"
6384                                  " is unknown" %  disk_template)
6385
6386   return req_size_dict[disk_template]
6387
6388
6389 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6390   """Hypervisor parameter validation.
6391
6392   This function abstract the hypervisor parameter validation to be
6393   used in both instance create and instance modify.
6394
6395   @type lu: L{LogicalUnit}
6396   @param lu: the logical unit for which we check
6397   @type nodenames: list
6398   @param nodenames: the list of nodes on which we should check
6399   @type hvname: string
6400   @param hvname: the name of the hypervisor we should use
6401   @type hvparams: dict
6402   @param hvparams: the parameters which we need to check
6403   @raise errors.OpPrereqError: if the parameters are not valid
6404
6405   """
6406   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6407                                                   hvname,
6408                                                   hvparams)
6409   for node in nodenames:
6410     info = hvinfo[node]
6411     if info.offline:
6412       continue
6413     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6414
6415
6416 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6417   """OS parameters validation.
6418
6419   @type lu: L{LogicalUnit}
6420   @param lu: the logical unit for which we check
6421   @type required: boolean
6422   @param required: whether the validation should fail if the OS is not
6423       found
6424   @type nodenames: list
6425   @param nodenames: the list of nodes on which we should check
6426   @type osname: string
6427   @param osname: the name of the hypervisor we should use
6428   @type osparams: dict
6429   @param osparams: the parameters which we need to check
6430   @raise errors.OpPrereqError: if the parameters are not valid
6431
6432   """
6433   result = lu.rpc.call_os_validate(required, nodenames, osname,
6434                                    [constants.OS_VALIDATE_PARAMETERS],
6435                                    osparams)
6436   for node, nres in result.items():
6437     # we don't check for offline cases since this should be run only
6438     # against the master node and/or an instance's nodes
6439     nres.Raise("OS Parameters validation failed on node %s" % node)
6440     if not nres.payload:
6441       lu.LogInfo("OS %s not found on node %s, validation skipped",
6442                  osname, node)
6443
6444
6445 class LUCreateInstance(LogicalUnit):
6446   """Create an instance.
6447
6448   """
6449   HPATH = "instance-add"
6450   HTYPE = constants.HTYPE_INSTANCE
6451   _OP_PARAMS = [
6452     _PInstanceName,
6453     ("mode", ht.NoDefault, ht.TElemOf(constants.INSTANCE_CREATE_MODES)),
6454     ("start", True, ht.TBool),
6455     ("wait_for_sync", True, ht.TBool),
6456     ("ip_check", True, ht.TBool),
6457     ("name_check", True, ht.TBool),
6458     ("disks", ht.NoDefault, ht.TListOf(ht.TDict)),
6459     ("nics", ht.NoDefault, ht.TListOf(ht.TDict)),
6460     ("hvparams", ht.EmptyDict, ht.TDict),
6461     ("beparams", ht.EmptyDict, ht.TDict),
6462     ("osparams", ht.EmptyDict, ht.TDict),
6463     ("no_install", None, ht.TMaybeBool),
6464     ("os_type", None, ht.TMaybeString),
6465     ("force_variant", False, ht.TBool),
6466     ("source_handshake", None, ht.TOr(ht.TList, ht.TNone)),
6467     ("source_x509_ca", None, ht.TMaybeString),
6468     ("source_instance_name", None, ht.TMaybeString),
6469     ("src_node", None, ht.TMaybeString),
6470     ("src_path", None, ht.TMaybeString),
6471     ("pnode", None, ht.TMaybeString),
6472     ("snode", None, ht.TMaybeString),
6473     ("iallocator", None, ht.TMaybeString),
6474     ("hypervisor", None, ht.TMaybeString),
6475     ("disk_template", ht.NoDefault, _CheckDiskTemplate),
6476     ("identify_defaults", False, ht.TBool),
6477     ("file_driver", None, ht.TOr(ht.TNone, ht.TElemOf(constants.FILE_DRIVER))),
6478     ("file_storage_dir", None, ht.TMaybeString),
6479     ]
6480   REQ_BGL = False
6481
6482   def CheckArguments(self):
6483     """Check arguments.
6484
6485     """
6486     # do not require name_check to ease forward/backward compatibility
6487     # for tools
6488     if self.op.no_install and self.op.start:
6489       self.LogInfo("No-installation mode selected, disabling startup")
6490       self.op.start = False
6491     # validate/normalize the instance name
6492     self.op.instance_name = \
6493       netutils.Hostname.GetNormalizedName(self.op.instance_name)
6494
6495     if self.op.ip_check and not self.op.name_check:
6496       # TODO: make the ip check more flexible and not depend on the name check
6497       raise errors.OpPrereqError("Cannot do ip check without a name check",
6498                                  errors.ECODE_INVAL)
6499
6500     # check nics' parameter names
6501     for nic in self.op.nics:
6502       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6503
6504     # check disks. parameter names and consistent adopt/no-adopt strategy
6505     has_adopt = has_no_adopt = False
6506     for disk in self.op.disks:
6507       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6508       if "adopt" in disk:
6509         has_adopt = True
6510       else:
6511         has_no_adopt = True
6512     if has_adopt and has_no_adopt:
6513       raise errors.OpPrereqError("Either all disks are adopted or none is",
6514                                  errors.ECODE_INVAL)
6515     if has_adopt:
6516       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
6517         raise errors.OpPrereqError("Disk adoption is not supported for the"
6518                                    " '%s' disk template" %
6519                                    self.op.disk_template,
6520                                    errors.ECODE_INVAL)
6521       if self.op.iallocator is not None:
6522         raise errors.OpPrereqError("Disk adoption not allowed with an"
6523                                    " iallocator script", errors.ECODE_INVAL)
6524       if self.op.mode == constants.INSTANCE_IMPORT:
6525         raise errors.OpPrereqError("Disk adoption not allowed for"
6526                                    " instance import", errors.ECODE_INVAL)
6527
6528     self.adopt_disks = has_adopt
6529
6530     # instance name verification
6531     if self.op.name_check:
6532       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
6533       self.op.instance_name = self.hostname1.name
6534       # used in CheckPrereq for ip ping check
6535       self.check_ip = self.hostname1.ip
6536     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6537       raise errors.OpPrereqError("Remote imports require names to be checked" %
6538                                  errors.ECODE_INVAL)
6539     else:
6540       self.check_ip = None
6541
6542     # file storage checks
6543     if (self.op.file_driver and
6544         not self.op.file_driver in constants.FILE_DRIVER):
6545       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6546                                  self.op.file_driver, errors.ECODE_INVAL)
6547
6548     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6549       raise errors.OpPrereqError("File storage directory path not absolute",
6550                                  errors.ECODE_INVAL)
6551
6552     ### Node/iallocator related checks
6553     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
6554
6555     if self.op.pnode is not None:
6556       if self.op.disk_template in constants.DTS_NET_MIRROR:
6557         if self.op.snode is None:
6558           raise errors.OpPrereqError("The networked disk templates need"
6559                                      " a mirror node", errors.ECODE_INVAL)
6560       elif self.op.snode:
6561         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
6562                         " template")
6563         self.op.snode = None
6564
6565     self._cds = _GetClusterDomainSecret()
6566
6567     if self.op.mode == constants.INSTANCE_IMPORT:
6568       # On import force_variant must be True, because if we forced it at
6569       # initial install, our only chance when importing it back is that it
6570       # works again!
6571       self.op.force_variant = True
6572
6573       if self.op.no_install:
6574         self.LogInfo("No-installation mode has no effect during import")
6575
6576     elif self.op.mode == constants.INSTANCE_CREATE:
6577       if self.op.os_type is None:
6578         raise errors.OpPrereqError("No guest OS specified",
6579                                    errors.ECODE_INVAL)
6580       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
6581         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
6582                                    " installation" % self.op.os_type,
6583                                    errors.ECODE_STATE)
6584       if self.op.disk_template is None:
6585         raise errors.OpPrereqError("No disk template specified",
6586                                    errors.ECODE_INVAL)
6587
6588     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6589       # Check handshake to ensure both clusters have the same domain secret
6590       src_handshake = self.op.source_handshake
6591       if not src_handshake:
6592         raise errors.OpPrereqError("Missing source handshake",
6593                                    errors.ECODE_INVAL)
6594
6595       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6596                                                            src_handshake)
6597       if errmsg:
6598         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6599                                    errors.ECODE_INVAL)
6600
6601       # Load and check source CA
6602       self.source_x509_ca_pem = self.op.source_x509_ca
6603       if not self.source_x509_ca_pem:
6604         raise errors.OpPrereqError("Missing source X509 CA",
6605                                    errors.ECODE_INVAL)
6606
6607       try:
6608         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6609                                                     self._cds)
6610       except OpenSSL.crypto.Error, err:
6611         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6612                                    (err, ), errors.ECODE_INVAL)
6613
6614       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6615       if errcode is not None:
6616         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6617                                    errors.ECODE_INVAL)
6618
6619       self.source_x509_ca = cert
6620
6621       src_instance_name = self.op.source_instance_name
6622       if not src_instance_name:
6623         raise errors.OpPrereqError("Missing source instance name",
6624                                    errors.ECODE_INVAL)
6625
6626       self.source_instance_name = \
6627           netutils.GetHostname(name=src_instance_name).name
6628
6629     else:
6630       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6631                                  self.op.mode, errors.ECODE_INVAL)
6632
6633   def ExpandNames(self):
6634     """ExpandNames for CreateInstance.
6635
6636     Figure out the right locks for instance creation.
6637
6638     """
6639     self.needed_locks = {}
6640
6641     instance_name = self.op.instance_name
6642     # this is just a preventive check, but someone might still add this
6643     # instance in the meantime, and creation will fail at lock-add time
6644     if instance_name in self.cfg.GetInstanceList():
6645       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6646                                  instance_name, errors.ECODE_EXISTS)
6647
6648     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6649
6650     if self.op.iallocator:
6651       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6652     else:
6653       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6654       nodelist = [self.op.pnode]
6655       if self.op.snode is not None:
6656         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6657         nodelist.append(self.op.snode)
6658       self.needed_locks[locking.LEVEL_NODE] = nodelist
6659
6660     # in case of import lock the source node too
6661     if self.op.mode == constants.INSTANCE_IMPORT:
6662       src_node = self.op.src_node
6663       src_path = self.op.src_path
6664
6665       if src_path is None:
6666         self.op.src_path = src_path = self.op.instance_name
6667
6668       if src_node is None:
6669         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6670         self.op.src_node = None
6671         if os.path.isabs(src_path):
6672           raise errors.OpPrereqError("Importing an instance from an absolute"
6673                                      " path requires a source node option.",
6674                                      errors.ECODE_INVAL)
6675       else:
6676         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6677         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6678           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6679         if not os.path.isabs(src_path):
6680           self.op.src_path = src_path = \
6681             utils.PathJoin(constants.EXPORT_DIR, src_path)
6682
6683   def _RunAllocator(self):
6684     """Run the allocator based on input opcode.
6685
6686     """
6687     nics = [n.ToDict() for n in self.nics]
6688     ial = IAllocator(self.cfg, self.rpc,
6689                      mode=constants.IALLOCATOR_MODE_ALLOC,
6690                      name=self.op.instance_name,
6691                      disk_template=self.op.disk_template,
6692                      tags=[],
6693                      os=self.op.os_type,
6694                      vcpus=self.be_full[constants.BE_VCPUS],
6695                      mem_size=self.be_full[constants.BE_MEMORY],
6696                      disks=self.disks,
6697                      nics=nics,
6698                      hypervisor=self.op.hypervisor,
6699                      )
6700
6701     ial.Run(self.op.iallocator)
6702
6703     if not ial.success:
6704       raise errors.OpPrereqError("Can't compute nodes using"
6705                                  " iallocator '%s': %s" %
6706                                  (self.op.iallocator, ial.info),
6707                                  errors.ECODE_NORES)
6708     if len(ial.result) != ial.required_nodes:
6709       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6710                                  " of nodes (%s), required %s" %
6711                                  (self.op.iallocator, len(ial.result),
6712                                   ial.required_nodes), errors.ECODE_FAULT)
6713     self.op.pnode = ial.result[0]
6714     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6715                  self.op.instance_name, self.op.iallocator,
6716                  utils.CommaJoin(ial.result))
6717     if ial.required_nodes == 2:
6718       self.op.snode = ial.result[1]
6719
6720   def BuildHooksEnv(self):
6721     """Build hooks env.
6722
6723     This runs on master, primary and secondary nodes of the instance.
6724
6725     """
6726     env = {
6727       "ADD_MODE": self.op.mode,
6728       }
6729     if self.op.mode == constants.INSTANCE_IMPORT:
6730       env["SRC_NODE"] = self.op.src_node
6731       env["SRC_PATH"] = self.op.src_path
6732       env["SRC_IMAGES"] = self.src_images
6733
6734     env.update(_BuildInstanceHookEnv(
6735       name=self.op.instance_name,
6736       primary_node=self.op.pnode,
6737       secondary_nodes=self.secondaries,
6738       status=self.op.start,
6739       os_type=self.op.os_type,
6740       memory=self.be_full[constants.BE_MEMORY],
6741       vcpus=self.be_full[constants.BE_VCPUS],
6742       nics=_NICListToTuple(self, self.nics),
6743       disk_template=self.op.disk_template,
6744       disks=[(d["size"], d["mode"]) for d in self.disks],
6745       bep=self.be_full,
6746       hvp=self.hv_full,
6747       hypervisor_name=self.op.hypervisor,
6748     ))
6749
6750     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6751           self.secondaries)
6752     return env, nl, nl
6753
6754   def _ReadExportInfo(self):
6755     """Reads the export information from disk.
6756
6757     It will override the opcode source node and path with the actual
6758     information, if these two were not specified before.
6759
6760     @return: the export information
6761
6762     """
6763     assert self.op.mode == constants.INSTANCE_IMPORT
6764
6765     src_node = self.op.src_node
6766     src_path = self.op.src_path
6767
6768     if src_node is None:
6769       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6770       exp_list = self.rpc.call_export_list(locked_nodes)
6771       found = False
6772       for node in exp_list:
6773         if exp_list[node].fail_msg:
6774           continue
6775         if src_path in exp_list[node].payload:
6776           found = True
6777           self.op.src_node = src_node = node
6778           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6779                                                        src_path)
6780           break
6781       if not found:
6782         raise errors.OpPrereqError("No export found for relative path %s" %
6783                                     src_path, errors.ECODE_INVAL)
6784
6785     _CheckNodeOnline(self, src_node)
6786     result = self.rpc.call_export_info(src_node, src_path)
6787     result.Raise("No export or invalid export found in dir %s" % src_path)
6788
6789     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6790     if not export_info.has_section(constants.INISECT_EXP):
6791       raise errors.ProgrammerError("Corrupted export config",
6792                                    errors.ECODE_ENVIRON)
6793
6794     ei_version = export_info.get(constants.INISECT_EXP, "version")
6795     if (int(ei_version) != constants.EXPORT_VERSION):
6796       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6797                                  (ei_version, constants.EXPORT_VERSION),
6798                                  errors.ECODE_ENVIRON)
6799     return export_info
6800
6801   def _ReadExportParams(self, einfo):
6802     """Use export parameters as defaults.
6803
6804     In case the opcode doesn't specify (as in override) some instance
6805     parameters, then try to use them from the export information, if
6806     that declares them.
6807
6808     """
6809     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6810
6811     if self.op.disk_template is None:
6812       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6813         self.op.disk_template = einfo.get(constants.INISECT_INS,
6814                                           "disk_template")
6815       else:
6816         raise errors.OpPrereqError("No disk template specified and the export"
6817                                    " is missing the disk_template information",
6818                                    errors.ECODE_INVAL)
6819
6820     if not self.op.disks:
6821       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6822         disks = []
6823         # TODO: import the disk iv_name too
6824         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6825           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6826           disks.append({"size": disk_sz})
6827         self.op.disks = disks
6828       else:
6829         raise errors.OpPrereqError("No disk info specified and the export"
6830                                    " is missing the disk information",
6831                                    errors.ECODE_INVAL)
6832
6833     if (not self.op.nics and
6834         einfo.has_option(constants.INISECT_INS, "nic_count")):
6835       nics = []
6836       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6837         ndict = {}
6838         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6839           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6840           ndict[name] = v
6841         nics.append(ndict)
6842       self.op.nics = nics
6843
6844     if (self.op.hypervisor is None and
6845         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6846       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6847     if einfo.has_section(constants.INISECT_HYP):
6848       # use the export parameters but do not override the ones
6849       # specified by the user
6850       for name, value in einfo.items(constants.INISECT_HYP):
6851         if name not in self.op.hvparams:
6852           self.op.hvparams[name] = value
6853
6854     if einfo.has_section(constants.INISECT_BEP):
6855       # use the parameters, without overriding
6856       for name, value in einfo.items(constants.INISECT_BEP):
6857         if name not in self.op.beparams:
6858           self.op.beparams[name] = value
6859     else:
6860       # try to read the parameters old style, from the main section
6861       for name in constants.BES_PARAMETERS:
6862         if (name not in self.op.beparams and
6863             einfo.has_option(constants.INISECT_INS, name)):
6864           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6865
6866     if einfo.has_section(constants.INISECT_OSP):
6867       # use the parameters, without overriding
6868       for name, value in einfo.items(constants.INISECT_OSP):
6869         if name not in self.op.osparams:
6870           self.op.osparams[name] = value
6871
6872   def _RevertToDefaults(self, cluster):
6873     """Revert the instance parameters to the default values.
6874
6875     """
6876     # hvparams
6877     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6878     for name in self.op.hvparams.keys():
6879       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6880         del self.op.hvparams[name]
6881     # beparams
6882     be_defs = cluster.SimpleFillBE({})
6883     for name in self.op.beparams.keys():
6884       if name in be_defs and be_defs[name] == self.op.beparams[name]:
6885         del self.op.beparams[name]
6886     # nic params
6887     nic_defs = cluster.SimpleFillNIC({})
6888     for nic in self.op.nics:
6889       for name in constants.NICS_PARAMETERS:
6890         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6891           del nic[name]
6892     # osparams
6893     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6894     for name in self.op.osparams.keys():
6895       if name in os_defs and os_defs[name] == self.op.osparams[name]:
6896         del self.op.osparams[name]
6897
6898   def CheckPrereq(self):
6899     """Check prerequisites.
6900
6901     """
6902     if self.op.mode == constants.INSTANCE_IMPORT:
6903       export_info = self._ReadExportInfo()
6904       self._ReadExportParams(export_info)
6905
6906     _CheckDiskTemplate(self.op.disk_template)
6907
6908     if (not self.cfg.GetVGName() and
6909         self.op.disk_template not in constants.DTS_NOT_LVM):
6910       raise errors.OpPrereqError("Cluster does not support lvm-based"
6911                                  " instances", errors.ECODE_STATE)
6912
6913     if self.op.hypervisor is None:
6914       self.op.hypervisor = self.cfg.GetHypervisorType()
6915
6916     cluster = self.cfg.GetClusterInfo()
6917     enabled_hvs = cluster.enabled_hypervisors
6918     if self.op.hypervisor not in enabled_hvs:
6919       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6920                                  " cluster (%s)" % (self.op.hypervisor,
6921                                   ",".join(enabled_hvs)),
6922                                  errors.ECODE_STATE)
6923
6924     # check hypervisor parameter syntax (locally)
6925     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6926     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6927                                       self.op.hvparams)
6928     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6929     hv_type.CheckParameterSyntax(filled_hvp)
6930     self.hv_full = filled_hvp
6931     # check that we don't specify global parameters on an instance
6932     _CheckGlobalHvParams(self.op.hvparams)
6933
6934     # fill and remember the beparams dict
6935     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6936     self.be_full = cluster.SimpleFillBE(self.op.beparams)
6937
6938     # build os parameters
6939     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6940
6941     # now that hvp/bep are in final format, let's reset to defaults,
6942     # if told to do so
6943     if self.op.identify_defaults:
6944       self._RevertToDefaults(cluster)
6945
6946     # NIC buildup
6947     self.nics = []
6948     for idx, nic in enumerate(self.op.nics):
6949       nic_mode_req = nic.get("mode", None)
6950       nic_mode = nic_mode_req
6951       if nic_mode is None:
6952         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6953
6954       # in routed mode, for the first nic, the default ip is 'auto'
6955       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6956         default_ip_mode = constants.VALUE_AUTO
6957       else:
6958         default_ip_mode = constants.VALUE_NONE
6959
6960       # ip validity checks
6961       ip = nic.get("ip", default_ip_mode)
6962       if ip is None or ip.lower() == constants.VALUE_NONE:
6963         nic_ip = None
6964       elif ip.lower() == constants.VALUE_AUTO:
6965         if not self.op.name_check:
6966           raise errors.OpPrereqError("IP address set to auto but name checks"
6967                                      " have been skipped",
6968                                      errors.ECODE_INVAL)
6969         nic_ip = self.hostname1.ip
6970       else:
6971         if not netutils.IPAddress.IsValid(ip):
6972           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
6973                                      errors.ECODE_INVAL)
6974         nic_ip = ip
6975
6976       # TODO: check the ip address for uniqueness
6977       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6978         raise errors.OpPrereqError("Routed nic mode requires an ip address",
6979                                    errors.ECODE_INVAL)
6980
6981       # MAC address verification
6982       mac = nic.get("mac", constants.VALUE_AUTO)
6983       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6984         mac = utils.NormalizeAndValidateMac(mac)
6985
6986         try:
6987           self.cfg.ReserveMAC(mac, self.proc.GetECId())
6988         except errors.ReservationError:
6989           raise errors.OpPrereqError("MAC address %s already in use"
6990                                      " in cluster" % mac,
6991                                      errors.ECODE_NOTUNIQUE)
6992
6993       # bridge verification
6994       bridge = nic.get("bridge", None)
6995       link = nic.get("link", None)
6996       if bridge and link:
6997         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6998                                    " at the same time", errors.ECODE_INVAL)
6999       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
7000         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
7001                                    errors.ECODE_INVAL)
7002       elif bridge:
7003         link = bridge
7004
7005       nicparams = {}
7006       if nic_mode_req:
7007         nicparams[constants.NIC_MODE] = nic_mode_req
7008       if link:
7009         nicparams[constants.NIC_LINK] = link
7010
7011       check_params = cluster.SimpleFillNIC(nicparams)
7012       objects.NIC.CheckParameterSyntax(check_params)
7013       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
7014
7015     # disk checks/pre-build
7016     self.disks = []
7017     for disk in self.op.disks:
7018       mode = disk.get("mode", constants.DISK_RDWR)
7019       if mode not in constants.DISK_ACCESS_SET:
7020         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
7021                                    mode, errors.ECODE_INVAL)
7022       size = disk.get("size", None)
7023       if size is None:
7024         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
7025       try:
7026         size = int(size)
7027       except (TypeError, ValueError):
7028         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
7029                                    errors.ECODE_INVAL)
7030       new_disk = {"size": size, "mode": mode}
7031       if "adopt" in disk:
7032         new_disk["adopt"] = disk["adopt"]
7033       self.disks.append(new_disk)
7034
7035     if self.op.mode == constants.INSTANCE_IMPORT:
7036
7037       # Check that the new instance doesn't have less disks than the export
7038       instance_disks = len(self.disks)
7039       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
7040       if instance_disks < export_disks:
7041         raise errors.OpPrereqError("Not enough disks to import."
7042                                    " (instance: %d, export: %d)" %
7043                                    (instance_disks, export_disks),
7044                                    errors.ECODE_INVAL)
7045
7046       disk_images = []
7047       for idx in range(export_disks):
7048         option = 'disk%d_dump' % idx
7049         if export_info.has_option(constants.INISECT_INS, option):
7050           # FIXME: are the old os-es, disk sizes, etc. useful?
7051           export_name = export_info.get(constants.INISECT_INS, option)
7052           image = utils.PathJoin(self.op.src_path, export_name)
7053           disk_images.append(image)
7054         else:
7055           disk_images.append(False)
7056
7057       self.src_images = disk_images
7058
7059       old_name = export_info.get(constants.INISECT_INS, 'name')
7060       try:
7061         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
7062       except (TypeError, ValueError), err:
7063         raise errors.OpPrereqError("Invalid export file, nic_count is not"
7064                                    " an integer: %s" % str(err),
7065                                    errors.ECODE_STATE)
7066       if self.op.instance_name == old_name:
7067         for idx, nic in enumerate(self.nics):
7068           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
7069             nic_mac_ini = 'nic%d_mac' % idx
7070             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
7071
7072     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
7073
7074     # ip ping checks (we use the same ip that was resolved in ExpandNames)
7075     if self.op.ip_check:
7076       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
7077         raise errors.OpPrereqError("IP %s of instance %s already in use" %
7078                                    (self.check_ip, self.op.instance_name),
7079                                    errors.ECODE_NOTUNIQUE)
7080
7081     #### mac address generation
7082     # By generating here the mac address both the allocator and the hooks get
7083     # the real final mac address rather than the 'auto' or 'generate' value.
7084     # There is a race condition between the generation and the instance object
7085     # creation, which means that we know the mac is valid now, but we're not
7086     # sure it will be when we actually add the instance. If things go bad
7087     # adding the instance will abort because of a duplicate mac, and the
7088     # creation job will fail.
7089     for nic in self.nics:
7090       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7091         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
7092
7093     #### allocator run
7094
7095     if self.op.iallocator is not None:
7096       self._RunAllocator()
7097
7098     #### node related checks
7099
7100     # check primary node
7101     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
7102     assert self.pnode is not None, \
7103       "Cannot retrieve locked node %s" % self.op.pnode
7104     if pnode.offline:
7105       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
7106                                  pnode.name, errors.ECODE_STATE)
7107     if pnode.drained:
7108       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
7109                                  pnode.name, errors.ECODE_STATE)
7110
7111     self.secondaries = []
7112
7113     # mirror node verification
7114     if self.op.disk_template in constants.DTS_NET_MIRROR:
7115       if self.op.snode == pnode.name:
7116         raise errors.OpPrereqError("The secondary node cannot be the"
7117                                    " primary node.", errors.ECODE_INVAL)
7118       _CheckNodeOnline(self, self.op.snode)
7119       _CheckNodeNotDrained(self, self.op.snode)
7120       self.secondaries.append(self.op.snode)
7121
7122     nodenames = [pnode.name] + self.secondaries
7123
7124     req_size = _ComputeDiskSize(self.op.disk_template,
7125                                 self.disks)
7126
7127     # Check lv size requirements, if not adopting
7128     if req_size is not None and not self.adopt_disks:
7129       _CheckNodesFreeDisk(self, nodenames, req_size)
7130
7131     if self.adopt_disks: # instead, we must check the adoption data
7132       all_lvs = set([i["adopt"] for i in self.disks])
7133       if len(all_lvs) != len(self.disks):
7134         raise errors.OpPrereqError("Duplicate volume names given for adoption",
7135                                    errors.ECODE_INVAL)
7136       for lv_name in all_lvs:
7137         try:
7138           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
7139         except errors.ReservationError:
7140           raise errors.OpPrereqError("LV named %s used by another instance" %
7141                                      lv_name, errors.ECODE_NOTUNIQUE)
7142
7143       node_lvs = self.rpc.call_lv_list([pnode.name],
7144                                        self.cfg.GetVGName())[pnode.name]
7145       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
7146       node_lvs = node_lvs.payload
7147       delta = all_lvs.difference(node_lvs.keys())
7148       if delta:
7149         raise errors.OpPrereqError("Missing logical volume(s): %s" %
7150                                    utils.CommaJoin(delta),
7151                                    errors.ECODE_INVAL)
7152       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
7153       if online_lvs:
7154         raise errors.OpPrereqError("Online logical volumes found, cannot"
7155                                    " adopt: %s" % utils.CommaJoin(online_lvs),
7156                                    errors.ECODE_STATE)
7157       # update the size of disk based on what is found
7158       for dsk in self.disks:
7159         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
7160
7161     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
7162
7163     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
7164     # check OS parameters (remotely)
7165     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
7166
7167     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
7168
7169     # memory check on primary node
7170     if self.op.start:
7171       _CheckNodeFreeMemory(self, self.pnode.name,
7172                            "creating instance %s" % self.op.instance_name,
7173                            self.be_full[constants.BE_MEMORY],
7174                            self.op.hypervisor)
7175
7176     self.dry_run_result = list(nodenames)
7177
7178   def Exec(self, feedback_fn):
7179     """Create and add the instance to the cluster.
7180
7181     """
7182     instance = self.op.instance_name
7183     pnode_name = self.pnode.name
7184
7185     ht_kind = self.op.hypervisor
7186     if ht_kind in constants.HTS_REQ_PORT:
7187       network_port = self.cfg.AllocatePort()
7188     else:
7189       network_port = None
7190
7191     if constants.ENABLE_FILE_STORAGE:
7192       # this is needed because os.path.join does not accept None arguments
7193       if self.op.file_storage_dir is None:
7194         string_file_storage_dir = ""
7195       else:
7196         string_file_storage_dir = self.op.file_storage_dir
7197
7198       # build the full file storage dir path
7199       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7200                                         string_file_storage_dir, instance)
7201     else:
7202       file_storage_dir = ""
7203
7204     disks = _GenerateDiskTemplate(self,
7205                                   self.op.disk_template,
7206                                   instance, pnode_name,
7207                                   self.secondaries,
7208                                   self.disks,
7209                                   file_storage_dir,
7210                                   self.op.file_driver,
7211                                   0)
7212
7213     iobj = objects.Instance(name=instance, os=self.op.os_type,
7214                             primary_node=pnode_name,
7215                             nics=self.nics, disks=disks,
7216                             disk_template=self.op.disk_template,
7217                             admin_up=False,
7218                             network_port=network_port,
7219                             beparams=self.op.beparams,
7220                             hvparams=self.op.hvparams,
7221                             hypervisor=self.op.hypervisor,
7222                             osparams=self.op.osparams,
7223                             )
7224
7225     if self.adopt_disks:
7226       # rename LVs to the newly-generated names; we need to construct
7227       # 'fake' LV disks with the old data, plus the new unique_id
7228       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7229       rename_to = []
7230       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7231         rename_to.append(t_dsk.logical_id)
7232         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7233         self.cfg.SetDiskID(t_dsk, pnode_name)
7234       result = self.rpc.call_blockdev_rename(pnode_name,
7235                                              zip(tmp_disks, rename_to))
7236       result.Raise("Failed to rename adoped LVs")
7237     else:
7238       feedback_fn("* creating instance disks...")
7239       try:
7240         _CreateDisks(self, iobj)
7241       except errors.OpExecError:
7242         self.LogWarning("Device creation failed, reverting...")
7243         try:
7244           _RemoveDisks(self, iobj)
7245         finally:
7246           self.cfg.ReleaseDRBDMinors(instance)
7247           raise
7248
7249     feedback_fn("adding instance %s to cluster config" % instance)
7250
7251     self.cfg.AddInstance(iobj, self.proc.GetECId())
7252
7253     # Declare that we don't want to remove the instance lock anymore, as we've
7254     # added the instance to the config
7255     del self.remove_locks[locking.LEVEL_INSTANCE]
7256     # Unlock all the nodes
7257     if self.op.mode == constants.INSTANCE_IMPORT:
7258       nodes_keep = [self.op.src_node]
7259       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7260                        if node != self.op.src_node]
7261       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7262       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7263     else:
7264       self.context.glm.release(locking.LEVEL_NODE)
7265       del self.acquired_locks[locking.LEVEL_NODE]
7266
7267     if self.op.wait_for_sync:
7268       disk_abort = not _WaitForSync(self, iobj)
7269     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7270       # make sure the disks are not degraded (still sync-ing is ok)
7271       time.sleep(15)
7272       feedback_fn("* checking mirrors status")
7273       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7274     else:
7275       disk_abort = False
7276
7277     if disk_abort:
7278       _RemoveDisks(self, iobj)
7279       self.cfg.RemoveInstance(iobj.name)
7280       # Make sure the instance lock gets removed
7281       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7282       raise errors.OpExecError("There are some degraded disks for"
7283                                " this instance")
7284
7285     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7286       if self.op.mode == constants.INSTANCE_CREATE:
7287         if not self.op.no_install:
7288           feedback_fn("* running the instance OS create scripts...")
7289           # FIXME: pass debug option from opcode to backend
7290           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7291                                                  self.op.debug_level)
7292           result.Raise("Could not add os for instance %s"
7293                        " on node %s" % (instance, pnode_name))
7294
7295       elif self.op.mode == constants.INSTANCE_IMPORT:
7296         feedback_fn("* running the instance OS import scripts...")
7297
7298         transfers = []
7299
7300         for idx, image in enumerate(self.src_images):
7301           if not image:
7302             continue
7303
7304           # FIXME: pass debug option from opcode to backend
7305           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7306                                              constants.IEIO_FILE, (image, ),
7307                                              constants.IEIO_SCRIPT,
7308                                              (iobj.disks[idx], idx),
7309                                              None)
7310           transfers.append(dt)
7311
7312         import_result = \
7313           masterd.instance.TransferInstanceData(self, feedback_fn,
7314                                                 self.op.src_node, pnode_name,
7315                                                 self.pnode.secondary_ip,
7316                                                 iobj, transfers)
7317         if not compat.all(import_result):
7318           self.LogWarning("Some disks for instance %s on node %s were not"
7319                           " imported successfully" % (instance, pnode_name))
7320
7321       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7322         feedback_fn("* preparing remote import...")
7323         connect_timeout = constants.RIE_CONNECT_TIMEOUT
7324         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7325
7326         disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7327                                                      self.source_x509_ca,
7328                                                      self._cds, timeouts)
7329         if not compat.all(disk_results):
7330           # TODO: Should the instance still be started, even if some disks
7331           # failed to import (valid for local imports, too)?
7332           self.LogWarning("Some disks for instance %s on node %s were not"
7333                           " imported successfully" % (instance, pnode_name))
7334
7335         # Run rename script on newly imported instance
7336         assert iobj.name == instance
7337         feedback_fn("Running rename script for %s" % instance)
7338         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7339                                                    self.source_instance_name,
7340                                                    self.op.debug_level)
7341         if result.fail_msg:
7342           self.LogWarning("Failed to run rename script for %s on node"
7343                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7344
7345       else:
7346         # also checked in the prereq part
7347         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7348                                      % self.op.mode)
7349
7350     if self.op.start:
7351       iobj.admin_up = True
7352       self.cfg.Update(iobj, feedback_fn)
7353       logging.info("Starting instance %s on node %s", instance, pnode_name)
7354       feedback_fn("* starting instance...")
7355       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7356       result.Raise("Could not start instance")
7357
7358     return list(iobj.all_nodes)
7359
7360
7361 class LUConnectConsole(NoHooksLU):
7362   """Connect to an instance's console.
7363
7364   This is somewhat special in that it returns the command line that
7365   you need to run on the master node in order to connect to the
7366   console.
7367
7368   """
7369   _OP_PARAMS = [
7370     _PInstanceName
7371     ]
7372   REQ_BGL = False
7373
7374   def ExpandNames(self):
7375     self._ExpandAndLockInstance()
7376
7377   def CheckPrereq(self):
7378     """Check prerequisites.
7379
7380     This checks that the instance is in the cluster.
7381
7382     """
7383     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7384     assert self.instance is not None, \
7385       "Cannot retrieve locked instance %s" % self.op.instance_name
7386     _CheckNodeOnline(self, self.instance.primary_node)
7387
7388   def Exec(self, feedback_fn):
7389     """Connect to the console of an instance
7390
7391     """
7392     instance = self.instance
7393     node = instance.primary_node
7394
7395     node_insts = self.rpc.call_instance_list([node],
7396                                              [instance.hypervisor])[node]
7397     node_insts.Raise("Can't get node information from %s" % node)
7398
7399     if instance.name not in node_insts.payload:
7400       if instance.admin_up:
7401         state = "ERROR_down"
7402       else:
7403         state = "ADMIN_down"
7404       raise errors.OpExecError("Instance %s is not running (state %s)" %
7405                                (instance.name, state))
7406
7407     logging.debug("Connecting to console of %s on %s", instance.name, node)
7408
7409     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7410     cluster = self.cfg.GetClusterInfo()
7411     # beparams and hvparams are passed separately, to avoid editing the
7412     # instance and then saving the defaults in the instance itself.
7413     hvparams = cluster.FillHV(instance)
7414     beparams = cluster.FillBE(instance)
7415     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7416
7417     # build ssh cmdline
7418     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7419
7420
7421 class LUReplaceDisks(LogicalUnit):
7422   """Replace the disks of an instance.
7423
7424   """
7425   HPATH = "mirrors-replace"
7426   HTYPE = constants.HTYPE_INSTANCE
7427   _OP_PARAMS = [
7428     _PInstanceName,
7429     ("mode", ht.NoDefault, ht.TElemOf(constants.REPLACE_MODES)),
7430     ("disks", ht.EmptyList, ht.TListOf(ht.TPositiveInt)),
7431     ("remote_node", None, ht.TMaybeString),
7432     ("iallocator", None, ht.TMaybeString),
7433     ("early_release", False, ht.TBool),
7434     ]
7435   REQ_BGL = False
7436
7437   def CheckArguments(self):
7438     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7439                                   self.op.iallocator)
7440
7441   def ExpandNames(self):
7442     self._ExpandAndLockInstance()
7443
7444     if self.op.iallocator is not None:
7445       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7446
7447     elif self.op.remote_node is not None:
7448       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7449       self.op.remote_node = remote_node
7450
7451       # Warning: do not remove the locking of the new secondary here
7452       # unless DRBD8.AddChildren is changed to work in parallel;
7453       # currently it doesn't since parallel invocations of
7454       # FindUnusedMinor will conflict
7455       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7456       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7457
7458     else:
7459       self.needed_locks[locking.LEVEL_NODE] = []
7460       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7461
7462     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7463                                    self.op.iallocator, self.op.remote_node,
7464                                    self.op.disks, False, self.op.early_release)
7465
7466     self.tasklets = [self.replacer]
7467
7468   def DeclareLocks(self, level):
7469     # If we're not already locking all nodes in the set we have to declare the
7470     # instance's primary/secondary nodes.
7471     if (level == locking.LEVEL_NODE and
7472         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7473       self._LockInstancesNodes()
7474
7475   def BuildHooksEnv(self):
7476     """Build hooks env.
7477
7478     This runs on the master, the primary and all the secondaries.
7479
7480     """
7481     instance = self.replacer.instance
7482     env = {
7483       "MODE": self.op.mode,
7484       "NEW_SECONDARY": self.op.remote_node,
7485       "OLD_SECONDARY": instance.secondary_nodes[0],
7486       }
7487     env.update(_BuildInstanceHookEnvByObject(self, instance))
7488     nl = [
7489       self.cfg.GetMasterNode(),
7490       instance.primary_node,
7491       ]
7492     if self.op.remote_node is not None:
7493       nl.append(self.op.remote_node)
7494     return env, nl, nl
7495
7496
7497 class TLReplaceDisks(Tasklet):
7498   """Replaces disks for an instance.
7499
7500   Note: Locking is not within the scope of this class.
7501
7502   """
7503   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7504                disks, delay_iallocator, early_release):
7505     """Initializes this class.
7506
7507     """
7508     Tasklet.__init__(self, lu)
7509
7510     # Parameters
7511     self.instance_name = instance_name
7512     self.mode = mode
7513     self.iallocator_name = iallocator_name
7514     self.remote_node = remote_node
7515     self.disks = disks
7516     self.delay_iallocator = delay_iallocator
7517     self.early_release = early_release
7518
7519     # Runtime data
7520     self.instance = None
7521     self.new_node = None
7522     self.target_node = None
7523     self.other_node = None
7524     self.remote_node_info = None
7525     self.node_secondary_ip = None
7526
7527   @staticmethod
7528   def CheckArguments(mode, remote_node, iallocator):
7529     """Helper function for users of this class.
7530
7531     """
7532     # check for valid parameter combination
7533     if mode == constants.REPLACE_DISK_CHG:
7534       if remote_node is None and iallocator is None:
7535         raise errors.OpPrereqError("When changing the secondary either an"
7536                                    " iallocator script must be used or the"
7537                                    " new node given", errors.ECODE_INVAL)
7538
7539       if remote_node is not None and iallocator is not None:
7540         raise errors.OpPrereqError("Give either the iallocator or the new"
7541                                    " secondary, not both", errors.ECODE_INVAL)
7542
7543     elif remote_node is not None or iallocator is not None:
7544       # Not replacing the secondary
7545       raise errors.OpPrereqError("The iallocator and new node options can"
7546                                  " only be used when changing the"
7547                                  " secondary node", errors.ECODE_INVAL)
7548
7549   @staticmethod
7550   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7551     """Compute a new secondary node using an IAllocator.
7552
7553     """
7554     ial = IAllocator(lu.cfg, lu.rpc,
7555                      mode=constants.IALLOCATOR_MODE_RELOC,
7556                      name=instance_name,
7557                      relocate_from=relocate_from)
7558
7559     ial.Run(iallocator_name)
7560
7561     if not ial.success:
7562       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7563                                  " %s" % (iallocator_name, ial.info),
7564                                  errors.ECODE_NORES)
7565
7566     if len(ial.result) != ial.required_nodes:
7567       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7568                                  " of nodes (%s), required %s" %
7569                                  (iallocator_name,
7570                                   len(ial.result), ial.required_nodes),
7571                                  errors.ECODE_FAULT)
7572
7573     remote_node_name = ial.result[0]
7574
7575     lu.LogInfo("Selected new secondary for instance '%s': %s",
7576                instance_name, remote_node_name)
7577
7578     return remote_node_name
7579
7580   def _FindFaultyDisks(self, node_name):
7581     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7582                                     node_name, True)
7583
7584   def CheckPrereq(self):
7585     """Check prerequisites.
7586
7587     This checks that the instance is in the cluster.
7588
7589     """
7590     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7591     assert instance is not None, \
7592       "Cannot retrieve locked instance %s" % self.instance_name
7593
7594     if instance.disk_template != constants.DT_DRBD8:
7595       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7596                                  " instances", errors.ECODE_INVAL)
7597
7598     if len(instance.secondary_nodes) != 1:
7599       raise errors.OpPrereqError("The instance has a strange layout,"
7600                                  " expected one secondary but found %d" %
7601                                  len(instance.secondary_nodes),
7602                                  errors.ECODE_FAULT)
7603
7604     if not self.delay_iallocator:
7605       self._CheckPrereq2()
7606
7607   def _CheckPrereq2(self):
7608     """Check prerequisites, second part.
7609
7610     This function should always be part of CheckPrereq. It was separated and is
7611     now called from Exec because during node evacuation iallocator was only
7612     called with an unmodified cluster model, not taking planned changes into
7613     account.
7614
7615     """
7616     instance = self.instance
7617     secondary_node = instance.secondary_nodes[0]
7618
7619     if self.iallocator_name is None:
7620       remote_node = self.remote_node
7621     else:
7622       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7623                                        instance.name, instance.secondary_nodes)
7624
7625     if remote_node is not None:
7626       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7627       assert self.remote_node_info is not None, \
7628         "Cannot retrieve locked node %s" % remote_node
7629     else:
7630       self.remote_node_info = None
7631
7632     if remote_node == self.instance.primary_node:
7633       raise errors.OpPrereqError("The specified node is the primary node of"
7634                                  " the instance.", errors.ECODE_INVAL)
7635
7636     if remote_node == secondary_node:
7637       raise errors.OpPrereqError("The specified node is already the"
7638                                  " secondary node of the instance.",
7639                                  errors.ECODE_INVAL)
7640
7641     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7642                                     constants.REPLACE_DISK_CHG):
7643       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7644                                  errors.ECODE_INVAL)
7645
7646     if self.mode == constants.REPLACE_DISK_AUTO:
7647       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7648       faulty_secondary = self._FindFaultyDisks(secondary_node)
7649
7650       if faulty_primary and faulty_secondary:
7651         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7652                                    " one node and can not be repaired"
7653                                    " automatically" % self.instance_name,
7654                                    errors.ECODE_STATE)
7655
7656       if faulty_primary:
7657         self.disks = faulty_primary
7658         self.target_node = instance.primary_node
7659         self.other_node = secondary_node
7660         check_nodes = [self.target_node, self.other_node]
7661       elif faulty_secondary:
7662         self.disks = faulty_secondary
7663         self.target_node = secondary_node
7664         self.other_node = instance.primary_node
7665         check_nodes = [self.target_node, self.other_node]
7666       else:
7667         self.disks = []
7668         check_nodes = []
7669
7670     else:
7671       # Non-automatic modes
7672       if self.mode == constants.REPLACE_DISK_PRI:
7673         self.target_node = instance.primary_node
7674         self.other_node = secondary_node
7675         check_nodes = [self.target_node, self.other_node]
7676
7677       elif self.mode == constants.REPLACE_DISK_SEC:
7678         self.target_node = secondary_node
7679         self.other_node = instance.primary_node
7680         check_nodes = [self.target_node, self.other_node]
7681
7682       elif self.mode == constants.REPLACE_DISK_CHG:
7683         self.new_node = remote_node
7684         self.other_node = instance.primary_node
7685         self.target_node = secondary_node
7686         check_nodes = [self.new_node, self.other_node]
7687
7688         _CheckNodeNotDrained(self.lu, remote_node)
7689
7690         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7691         assert old_node_info is not None
7692         if old_node_info.offline and not self.early_release:
7693           # doesn't make sense to delay the release
7694           self.early_release = True
7695           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7696                           " early-release mode", secondary_node)
7697
7698       else:
7699         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7700                                      self.mode)
7701
7702       # If not specified all disks should be replaced
7703       if not self.disks:
7704         self.disks = range(len(self.instance.disks))
7705
7706     for node in check_nodes:
7707       _CheckNodeOnline(self.lu, node)
7708
7709     # Check whether disks are valid
7710     for disk_idx in self.disks:
7711       instance.FindDisk(disk_idx)
7712
7713     # Get secondary node IP addresses
7714     node_2nd_ip = {}
7715
7716     for node_name in [self.target_node, self.other_node, self.new_node]:
7717       if node_name is not None:
7718         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7719
7720     self.node_secondary_ip = node_2nd_ip
7721
7722   def Exec(self, feedback_fn):
7723     """Execute disk replacement.
7724
7725     This dispatches the disk replacement to the appropriate handler.
7726
7727     """
7728     if self.delay_iallocator:
7729       self._CheckPrereq2()
7730
7731     if not self.disks:
7732       feedback_fn("No disks need replacement")
7733       return
7734
7735     feedback_fn("Replacing disk(s) %s for %s" %
7736                 (utils.CommaJoin(self.disks), self.instance.name))
7737
7738     activate_disks = (not self.instance.admin_up)
7739
7740     # Activate the instance disks if we're replacing them on a down instance
7741     if activate_disks:
7742       _StartInstanceDisks(self.lu, self.instance, True)
7743
7744     try:
7745       # Should we replace the secondary node?
7746       if self.new_node is not None:
7747         fn = self._ExecDrbd8Secondary
7748       else:
7749         fn = self._ExecDrbd8DiskOnly
7750
7751       return fn(feedback_fn)
7752
7753     finally:
7754       # Deactivate the instance disks if we're replacing them on a
7755       # down instance
7756       if activate_disks:
7757         _SafeShutdownInstanceDisks(self.lu, self.instance)
7758
7759   def _CheckVolumeGroup(self, nodes):
7760     self.lu.LogInfo("Checking volume groups")
7761
7762     vgname = self.cfg.GetVGName()
7763
7764     # Make sure volume group exists on all involved nodes
7765     results = self.rpc.call_vg_list(nodes)
7766     if not results:
7767       raise errors.OpExecError("Can't list volume groups on the nodes")
7768
7769     for node in nodes:
7770       res = results[node]
7771       res.Raise("Error checking node %s" % node)
7772       if vgname not in res.payload:
7773         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7774                                  (vgname, node))
7775
7776   def _CheckDisksExistence(self, nodes):
7777     # Check disk existence
7778     for idx, dev in enumerate(self.instance.disks):
7779       if idx not in self.disks:
7780         continue
7781
7782       for node in nodes:
7783         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7784         self.cfg.SetDiskID(dev, node)
7785
7786         result = self.rpc.call_blockdev_find(node, dev)
7787
7788         msg = result.fail_msg
7789         if msg or not result.payload:
7790           if not msg:
7791             msg = "disk not found"
7792           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7793                                    (idx, node, msg))
7794
7795   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7796     for idx, dev in enumerate(self.instance.disks):
7797       if idx not in self.disks:
7798         continue
7799
7800       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7801                       (idx, node_name))
7802
7803       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7804                                    ldisk=ldisk):
7805         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7806                                  " replace disks for instance %s" %
7807                                  (node_name, self.instance.name))
7808
7809   def _CreateNewStorage(self, node_name):
7810     vgname = self.cfg.GetVGName()
7811     iv_names = {}
7812
7813     for idx, dev in enumerate(self.instance.disks):
7814       if idx not in self.disks:
7815         continue
7816
7817       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7818
7819       self.cfg.SetDiskID(dev, node_name)
7820
7821       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7822       names = _GenerateUniqueNames(self.lu, lv_names)
7823
7824       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7825                              logical_id=(vgname, names[0]))
7826       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7827                              logical_id=(vgname, names[1]))
7828
7829       new_lvs = [lv_data, lv_meta]
7830       old_lvs = dev.children
7831       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7832
7833       # we pass force_create=True to force the LVM creation
7834       for new_lv in new_lvs:
7835         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7836                         _GetInstanceInfoText(self.instance), False)
7837
7838     return iv_names
7839
7840   def _CheckDevices(self, node_name, iv_names):
7841     for name, (dev, _, _) in iv_names.iteritems():
7842       self.cfg.SetDiskID(dev, node_name)
7843
7844       result = self.rpc.call_blockdev_find(node_name, dev)
7845
7846       msg = result.fail_msg
7847       if msg or not result.payload:
7848         if not msg:
7849           msg = "disk not found"
7850         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7851                                  (name, msg))
7852
7853       if result.payload.is_degraded:
7854         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7855
7856   def _RemoveOldStorage(self, node_name, iv_names):
7857     for name, (_, old_lvs, _) in iv_names.iteritems():
7858       self.lu.LogInfo("Remove logical volumes for %s" % name)
7859
7860       for lv in old_lvs:
7861         self.cfg.SetDiskID(lv, node_name)
7862
7863         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7864         if msg:
7865           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7866                              hint="remove unused LVs manually")
7867
7868   def _ReleaseNodeLock(self, node_name):
7869     """Releases the lock for a given node."""
7870     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7871
7872   def _ExecDrbd8DiskOnly(self, feedback_fn):
7873     """Replace a disk on the primary or secondary for DRBD 8.
7874
7875     The algorithm for replace is quite complicated:
7876
7877       1. for each disk to be replaced:
7878
7879         1. create new LVs on the target node with unique names
7880         1. detach old LVs from the drbd device
7881         1. rename old LVs to name_replaced.<time_t>
7882         1. rename new LVs to old LVs
7883         1. attach the new LVs (with the old names now) to the drbd device
7884
7885       1. wait for sync across all devices
7886
7887       1. for each modified disk:
7888
7889         1. remove old LVs (which have the name name_replaces.<time_t>)
7890
7891     Failures are not very well handled.
7892
7893     """
7894     steps_total = 6
7895
7896     # Step: check device activation
7897     self.lu.LogStep(1, steps_total, "Check device existence")
7898     self._CheckDisksExistence([self.other_node, self.target_node])
7899     self._CheckVolumeGroup([self.target_node, self.other_node])
7900
7901     # Step: check other node consistency
7902     self.lu.LogStep(2, steps_total, "Check peer consistency")
7903     self._CheckDisksConsistency(self.other_node,
7904                                 self.other_node == self.instance.primary_node,
7905                                 False)
7906
7907     # Step: create new storage
7908     self.lu.LogStep(3, steps_total, "Allocate new storage")
7909     iv_names = self._CreateNewStorage(self.target_node)
7910
7911     # Step: for each lv, detach+rename*2+attach
7912     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7913     for dev, old_lvs, new_lvs in iv_names.itervalues():
7914       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7915
7916       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7917                                                      old_lvs)
7918       result.Raise("Can't detach drbd from local storage on node"
7919                    " %s for device %s" % (self.target_node, dev.iv_name))
7920       #dev.children = []
7921       #cfg.Update(instance)
7922
7923       # ok, we created the new LVs, so now we know we have the needed
7924       # storage; as such, we proceed on the target node to rename
7925       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7926       # using the assumption that logical_id == physical_id (which in
7927       # turn is the unique_id on that node)
7928
7929       # FIXME(iustin): use a better name for the replaced LVs
7930       temp_suffix = int(time.time())
7931       ren_fn = lambda d, suff: (d.physical_id[0],
7932                                 d.physical_id[1] + "_replaced-%s" % suff)
7933
7934       # Build the rename list based on what LVs exist on the node
7935       rename_old_to_new = []
7936       for to_ren in old_lvs:
7937         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7938         if not result.fail_msg and result.payload:
7939           # device exists
7940           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7941
7942       self.lu.LogInfo("Renaming the old LVs on the target node")
7943       result = self.rpc.call_blockdev_rename(self.target_node,
7944                                              rename_old_to_new)
7945       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7946
7947       # Now we rename the new LVs to the old LVs
7948       self.lu.LogInfo("Renaming the new LVs on the target node")
7949       rename_new_to_old = [(new, old.physical_id)
7950                            for old, new in zip(old_lvs, new_lvs)]
7951       result = self.rpc.call_blockdev_rename(self.target_node,
7952                                              rename_new_to_old)
7953       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7954
7955       for old, new in zip(old_lvs, new_lvs):
7956         new.logical_id = old.logical_id
7957         self.cfg.SetDiskID(new, self.target_node)
7958
7959       for disk in old_lvs:
7960         disk.logical_id = ren_fn(disk, temp_suffix)
7961         self.cfg.SetDiskID(disk, self.target_node)
7962
7963       # Now that the new lvs have the old name, we can add them to the device
7964       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7965       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7966                                                   new_lvs)
7967       msg = result.fail_msg
7968       if msg:
7969         for new_lv in new_lvs:
7970           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7971                                                new_lv).fail_msg
7972           if msg2:
7973             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7974                                hint=("cleanup manually the unused logical"
7975                                      "volumes"))
7976         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7977
7978       dev.children = new_lvs
7979
7980       self.cfg.Update(self.instance, feedback_fn)
7981
7982     cstep = 5
7983     if self.early_release:
7984       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7985       cstep += 1
7986       self._RemoveOldStorage(self.target_node, iv_names)
7987       # WARNING: we release both node locks here, do not do other RPCs
7988       # than WaitForSync to the primary node
7989       self._ReleaseNodeLock([self.target_node, self.other_node])
7990
7991     # Wait for sync
7992     # This can fail as the old devices are degraded and _WaitForSync
7993     # does a combined result over all disks, so we don't check its return value
7994     self.lu.LogStep(cstep, steps_total, "Sync devices")
7995     cstep += 1
7996     _WaitForSync(self.lu, self.instance)
7997
7998     # Check all devices manually
7999     self._CheckDevices(self.instance.primary_node, iv_names)
8000
8001     # Step: remove old storage
8002     if not self.early_release:
8003       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8004       cstep += 1
8005       self._RemoveOldStorage(self.target_node, iv_names)
8006
8007   def _ExecDrbd8Secondary(self, feedback_fn):
8008     """Replace the secondary node for DRBD 8.
8009
8010     The algorithm for replace is quite complicated:
8011       - for all disks of the instance:
8012         - create new LVs on the new node with same names
8013         - shutdown the drbd device on the old secondary
8014         - disconnect the drbd network on the primary
8015         - create the drbd device on the new secondary
8016         - network attach the drbd on the primary, using an artifice:
8017           the drbd code for Attach() will connect to the network if it
8018           finds a device which is connected to the good local disks but
8019           not network enabled
8020       - wait for sync across all devices
8021       - remove all disks from the old secondary
8022
8023     Failures are not very well handled.
8024
8025     """
8026     steps_total = 6
8027
8028     # Step: check device activation
8029     self.lu.LogStep(1, steps_total, "Check device existence")
8030     self._CheckDisksExistence([self.instance.primary_node])
8031     self._CheckVolumeGroup([self.instance.primary_node])
8032
8033     # Step: check other node consistency
8034     self.lu.LogStep(2, steps_total, "Check peer consistency")
8035     self._CheckDisksConsistency(self.instance.primary_node, True, True)
8036
8037     # Step: create new storage
8038     self.lu.LogStep(3, steps_total, "Allocate new storage")
8039     for idx, dev in enumerate(self.instance.disks):
8040       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
8041                       (self.new_node, idx))
8042       # we pass force_create=True to force LVM creation
8043       for new_lv in dev.children:
8044         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
8045                         _GetInstanceInfoText(self.instance), False)
8046
8047     # Step 4: dbrd minors and drbd setups changes
8048     # after this, we must manually remove the drbd minors on both the
8049     # error and the success paths
8050     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
8051     minors = self.cfg.AllocateDRBDMinor([self.new_node
8052                                          for dev in self.instance.disks],
8053                                         self.instance.name)
8054     logging.debug("Allocated minors %r", minors)
8055
8056     iv_names = {}
8057     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
8058       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
8059                       (self.new_node, idx))
8060       # create new devices on new_node; note that we create two IDs:
8061       # one without port, so the drbd will be activated without
8062       # networking information on the new node at this stage, and one
8063       # with network, for the latter activation in step 4
8064       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
8065       if self.instance.primary_node == o_node1:
8066         p_minor = o_minor1
8067       else:
8068         assert self.instance.primary_node == o_node2, "Three-node instance?"
8069         p_minor = o_minor2
8070
8071       new_alone_id = (self.instance.primary_node, self.new_node, None,
8072                       p_minor, new_minor, o_secret)
8073       new_net_id = (self.instance.primary_node, self.new_node, o_port,
8074                     p_minor, new_minor, o_secret)
8075
8076       iv_names[idx] = (dev, dev.children, new_net_id)
8077       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
8078                     new_net_id)
8079       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
8080                               logical_id=new_alone_id,
8081                               children=dev.children,
8082                               size=dev.size)
8083       try:
8084         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
8085                               _GetInstanceInfoText(self.instance), False)
8086       except errors.GenericError:
8087         self.cfg.ReleaseDRBDMinors(self.instance.name)
8088         raise
8089
8090     # We have new devices, shutdown the drbd on the old secondary
8091     for idx, dev in enumerate(self.instance.disks):
8092       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
8093       self.cfg.SetDiskID(dev, self.target_node)
8094       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
8095       if msg:
8096         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8097                            "node: %s" % (idx, msg),
8098                            hint=("Please cleanup this device manually as"
8099                                  " soon as possible"))
8100
8101     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8102     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8103                                                self.node_secondary_ip,
8104                                                self.instance.disks)\
8105                                               [self.instance.primary_node]
8106
8107     msg = result.fail_msg
8108     if msg:
8109       # detaches didn't succeed (unlikely)
8110       self.cfg.ReleaseDRBDMinors(self.instance.name)
8111       raise errors.OpExecError("Can't detach the disks from the network on"
8112                                " old node: %s" % (msg,))
8113
8114     # if we managed to detach at least one, we update all the disks of
8115     # the instance to point to the new secondary
8116     self.lu.LogInfo("Updating instance configuration")
8117     for dev, _, new_logical_id in iv_names.itervalues():
8118       dev.logical_id = new_logical_id
8119       self.cfg.SetDiskID(dev, self.instance.primary_node)
8120
8121     self.cfg.Update(self.instance, feedback_fn)
8122
8123     # and now perform the drbd attach
8124     self.lu.LogInfo("Attaching primary drbds to new secondary"
8125                     " (standalone => connected)")
8126     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8127                                             self.new_node],
8128                                            self.node_secondary_ip,
8129                                            self.instance.disks,
8130                                            self.instance.name,
8131                                            False)
8132     for to_node, to_result in result.items():
8133       msg = to_result.fail_msg
8134       if msg:
8135         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8136                            to_node, msg,
8137                            hint=("please do a gnt-instance info to see the"
8138                                  " status of disks"))
8139     cstep = 5
8140     if self.early_release:
8141       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8142       cstep += 1
8143       self._RemoveOldStorage(self.target_node, iv_names)
8144       # WARNING: we release all node locks here, do not do other RPCs
8145       # than WaitForSync to the primary node
8146       self._ReleaseNodeLock([self.instance.primary_node,
8147                              self.target_node,
8148                              self.new_node])
8149
8150     # Wait for sync
8151     # This can fail as the old devices are degraded and _WaitForSync
8152     # does a combined result over all disks, so we don't check its return value
8153     self.lu.LogStep(cstep, steps_total, "Sync devices")
8154     cstep += 1
8155     _WaitForSync(self.lu, self.instance)
8156
8157     # Check all devices manually
8158     self._CheckDevices(self.instance.primary_node, iv_names)
8159
8160     # Step: remove old storage
8161     if not self.early_release:
8162       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8163       self._RemoveOldStorage(self.target_node, iv_names)
8164
8165
8166 class LURepairNodeStorage(NoHooksLU):
8167   """Repairs the volume group on a node.
8168
8169   """
8170   _OP_PARAMS = [
8171     _PNodeName,
8172     ("storage_type", ht.NoDefault, _CheckStorageType),
8173     ("name", ht.NoDefault, ht.TNonEmptyString),
8174     ("ignore_consistency", False, ht.TBool),
8175     ]
8176   REQ_BGL = False
8177
8178   def CheckArguments(self):
8179     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8180
8181     storage_type = self.op.storage_type
8182
8183     if (constants.SO_FIX_CONSISTENCY not in
8184         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8185       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8186                                  " repaired" % storage_type,
8187                                  errors.ECODE_INVAL)
8188
8189   def ExpandNames(self):
8190     self.needed_locks = {
8191       locking.LEVEL_NODE: [self.op.node_name],
8192       }
8193
8194   def _CheckFaultyDisks(self, instance, node_name):
8195     """Ensure faulty disks abort the opcode or at least warn."""
8196     try:
8197       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8198                                   node_name, True):
8199         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8200                                    " node '%s'" % (instance.name, node_name),
8201                                    errors.ECODE_STATE)
8202     except errors.OpPrereqError, err:
8203       if self.op.ignore_consistency:
8204         self.proc.LogWarning(str(err.args[0]))
8205       else:
8206         raise
8207
8208   def CheckPrereq(self):
8209     """Check prerequisites.
8210
8211     """
8212     # Check whether any instance on this node has faulty disks
8213     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8214       if not inst.admin_up:
8215         continue
8216       check_nodes = set(inst.all_nodes)
8217       check_nodes.discard(self.op.node_name)
8218       for inst_node_name in check_nodes:
8219         self._CheckFaultyDisks(inst, inst_node_name)
8220
8221   def Exec(self, feedback_fn):
8222     feedback_fn("Repairing storage unit '%s' on %s ..." %
8223                 (self.op.name, self.op.node_name))
8224
8225     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8226     result = self.rpc.call_storage_execute(self.op.node_name,
8227                                            self.op.storage_type, st_args,
8228                                            self.op.name,
8229                                            constants.SO_FIX_CONSISTENCY)
8230     result.Raise("Failed to repair storage unit '%s' on %s" %
8231                  (self.op.name, self.op.node_name))
8232
8233
8234 class LUNodeEvacuationStrategy(NoHooksLU):
8235   """Computes the node evacuation strategy.
8236
8237   """
8238   _OP_PARAMS = [
8239     ("nodes", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
8240     ("remote_node", None, ht.TMaybeString),
8241     ("iallocator", None, ht.TMaybeString),
8242     ]
8243   REQ_BGL = False
8244
8245   def CheckArguments(self):
8246     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
8247
8248   def ExpandNames(self):
8249     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8250     self.needed_locks = locks = {}
8251     if self.op.remote_node is None:
8252       locks[locking.LEVEL_NODE] = locking.ALL_SET
8253     else:
8254       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8255       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8256
8257   def Exec(self, feedback_fn):
8258     if self.op.remote_node is not None:
8259       instances = []
8260       for node in self.op.nodes:
8261         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8262       result = []
8263       for i in instances:
8264         if i.primary_node == self.op.remote_node:
8265           raise errors.OpPrereqError("Node %s is the primary node of"
8266                                      " instance %s, cannot use it as"
8267                                      " secondary" %
8268                                      (self.op.remote_node, i.name),
8269                                      errors.ECODE_INVAL)
8270         result.append([i.name, self.op.remote_node])
8271     else:
8272       ial = IAllocator(self.cfg, self.rpc,
8273                        mode=constants.IALLOCATOR_MODE_MEVAC,
8274                        evac_nodes=self.op.nodes)
8275       ial.Run(self.op.iallocator, validate=True)
8276       if not ial.success:
8277         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8278                                  errors.ECODE_NORES)
8279       result = ial.result
8280     return result
8281
8282
8283 class LUGrowDisk(LogicalUnit):
8284   """Grow a disk of an instance.
8285
8286   """
8287   HPATH = "disk-grow"
8288   HTYPE = constants.HTYPE_INSTANCE
8289   _OP_PARAMS = [
8290     _PInstanceName,
8291     ("disk", ht.NoDefault, ht.TInt),
8292     ("amount", ht.NoDefault, ht.TInt),
8293     ("wait_for_sync", True, ht.TBool),
8294     ]
8295   REQ_BGL = False
8296
8297   def ExpandNames(self):
8298     self._ExpandAndLockInstance()
8299     self.needed_locks[locking.LEVEL_NODE] = []
8300     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8301
8302   def DeclareLocks(self, level):
8303     if level == locking.LEVEL_NODE:
8304       self._LockInstancesNodes()
8305
8306   def BuildHooksEnv(self):
8307     """Build hooks env.
8308
8309     This runs on the master, the primary and all the secondaries.
8310
8311     """
8312     env = {
8313       "DISK": self.op.disk,
8314       "AMOUNT": self.op.amount,
8315       }
8316     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8317     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8318     return env, nl, nl
8319
8320   def CheckPrereq(self):
8321     """Check prerequisites.
8322
8323     This checks that the instance is in the cluster.
8324
8325     """
8326     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8327     assert instance is not None, \
8328       "Cannot retrieve locked instance %s" % self.op.instance_name
8329     nodenames = list(instance.all_nodes)
8330     for node in nodenames:
8331       _CheckNodeOnline(self, node)
8332
8333     self.instance = instance
8334
8335     if instance.disk_template not in constants.DTS_GROWABLE:
8336       raise errors.OpPrereqError("Instance's disk layout does not support"
8337                                  " growing.", errors.ECODE_INVAL)
8338
8339     self.disk = instance.FindDisk(self.op.disk)
8340
8341     if instance.disk_template != constants.DT_FILE:
8342       # TODO: check the free disk space for file, when that feature will be
8343       # supported
8344       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8345
8346   def Exec(self, feedback_fn):
8347     """Execute disk grow.
8348
8349     """
8350     instance = self.instance
8351     disk = self.disk
8352
8353     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8354     if not disks_ok:
8355       raise errors.OpExecError("Cannot activate block device to grow")
8356
8357     for node in instance.all_nodes:
8358       self.cfg.SetDiskID(disk, node)
8359       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8360       result.Raise("Grow request failed to node %s" % node)
8361
8362       # TODO: Rewrite code to work properly
8363       # DRBD goes into sync mode for a short amount of time after executing the
8364       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8365       # calling "resize" in sync mode fails. Sleeping for a short amount of
8366       # time is a work-around.
8367       time.sleep(5)
8368
8369     disk.RecordGrow(self.op.amount)
8370     self.cfg.Update(instance, feedback_fn)
8371     if self.op.wait_for_sync:
8372       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8373       if disk_abort:
8374         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8375                              " status.\nPlease check the instance.")
8376       if not instance.admin_up:
8377         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8378     elif not instance.admin_up:
8379       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8380                            " not supposed to be running because no wait for"
8381                            " sync mode was requested.")
8382
8383
8384 class LUQueryInstanceData(NoHooksLU):
8385   """Query runtime instance data.
8386
8387   """
8388   _OP_PARAMS = [
8389     ("instances", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
8390     ("static", False, ht.TBool),
8391     ]
8392   REQ_BGL = False
8393
8394   def ExpandNames(self):
8395     self.needed_locks = {}
8396     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8397
8398     if self.op.instances:
8399       self.wanted_names = []
8400       for name in self.op.instances:
8401         full_name = _ExpandInstanceName(self.cfg, name)
8402         self.wanted_names.append(full_name)
8403       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8404     else:
8405       self.wanted_names = None
8406       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8407
8408     self.needed_locks[locking.LEVEL_NODE] = []
8409     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8410
8411   def DeclareLocks(self, level):
8412     if level == locking.LEVEL_NODE:
8413       self._LockInstancesNodes()
8414
8415   def CheckPrereq(self):
8416     """Check prerequisites.
8417
8418     This only checks the optional instance list against the existing names.
8419
8420     """
8421     if self.wanted_names is None:
8422       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8423
8424     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8425                              in self.wanted_names]
8426
8427   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8428     """Returns the status of a block device
8429
8430     """
8431     if self.op.static or not node:
8432       return None
8433
8434     self.cfg.SetDiskID(dev, node)
8435
8436     result = self.rpc.call_blockdev_find(node, dev)
8437     if result.offline:
8438       return None
8439
8440     result.Raise("Can't compute disk status for %s" % instance_name)
8441
8442     status = result.payload
8443     if status is None:
8444       return None
8445
8446     return (status.dev_path, status.major, status.minor,
8447             status.sync_percent, status.estimated_time,
8448             status.is_degraded, status.ldisk_status)
8449
8450   def _ComputeDiskStatus(self, instance, snode, dev):
8451     """Compute block device status.
8452
8453     """
8454     if dev.dev_type in constants.LDS_DRBD:
8455       # we change the snode then (otherwise we use the one passed in)
8456       if dev.logical_id[0] == instance.primary_node:
8457         snode = dev.logical_id[1]
8458       else:
8459         snode = dev.logical_id[0]
8460
8461     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8462                                               instance.name, dev)
8463     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8464
8465     if dev.children:
8466       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8467                       for child in dev.children]
8468     else:
8469       dev_children = []
8470
8471     data = {
8472       "iv_name": dev.iv_name,
8473       "dev_type": dev.dev_type,
8474       "logical_id": dev.logical_id,
8475       "physical_id": dev.physical_id,
8476       "pstatus": dev_pstatus,
8477       "sstatus": dev_sstatus,
8478       "children": dev_children,
8479       "mode": dev.mode,
8480       "size": dev.size,
8481       }
8482
8483     return data
8484
8485   def Exec(self, feedback_fn):
8486     """Gather and return data"""
8487     result = {}
8488
8489     cluster = self.cfg.GetClusterInfo()
8490
8491     for instance in self.wanted_instances:
8492       if not self.op.static:
8493         remote_info = self.rpc.call_instance_info(instance.primary_node,
8494                                                   instance.name,
8495                                                   instance.hypervisor)
8496         remote_info.Raise("Error checking node %s" % instance.primary_node)
8497         remote_info = remote_info.payload
8498         if remote_info and "state" in remote_info:
8499           remote_state = "up"
8500         else:
8501           remote_state = "down"
8502       else:
8503         remote_state = None
8504       if instance.admin_up:
8505         config_state = "up"
8506       else:
8507         config_state = "down"
8508
8509       disks = [self._ComputeDiskStatus(instance, None, device)
8510                for device in instance.disks]
8511
8512       idict = {
8513         "name": instance.name,
8514         "config_state": config_state,
8515         "run_state": remote_state,
8516         "pnode": instance.primary_node,
8517         "snodes": instance.secondary_nodes,
8518         "os": instance.os,
8519         # this happens to be the same format used for hooks
8520         "nics": _NICListToTuple(self, instance.nics),
8521         "disk_template": instance.disk_template,
8522         "disks": disks,
8523         "hypervisor": instance.hypervisor,
8524         "network_port": instance.network_port,
8525         "hv_instance": instance.hvparams,
8526         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8527         "be_instance": instance.beparams,
8528         "be_actual": cluster.FillBE(instance),
8529         "os_instance": instance.osparams,
8530         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8531         "serial_no": instance.serial_no,
8532         "mtime": instance.mtime,
8533         "ctime": instance.ctime,
8534         "uuid": instance.uuid,
8535         }
8536
8537       result[instance.name] = idict
8538
8539     return result
8540
8541
8542 class LUSetInstanceParams(LogicalUnit):
8543   """Modifies an instances's parameters.
8544
8545   """
8546   HPATH = "instance-modify"
8547   HTYPE = constants.HTYPE_INSTANCE
8548   _OP_PARAMS = [
8549     _PInstanceName,
8550     ("nics", ht.EmptyList, ht.TList),
8551     ("disks", ht.EmptyList, ht.TList),
8552     ("beparams", ht.EmptyDict, ht.TDict),
8553     ("hvparams", ht.EmptyDict, ht.TDict),
8554     ("disk_template", None, ht.TMaybeString),
8555     ("remote_node", None, ht.TMaybeString),
8556     ("os_name", None, ht.TMaybeString),
8557     ("force_variant", False, ht.TBool),
8558     ("osparams", None, ht.TOr(ht.TDict, ht.TNone)),
8559     _PForce,
8560     ]
8561   REQ_BGL = False
8562
8563   def CheckArguments(self):
8564     if not (self.op.nics or self.op.disks or self.op.disk_template or
8565             self.op.hvparams or self.op.beparams or self.op.os_name):
8566       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8567
8568     if self.op.hvparams:
8569       _CheckGlobalHvParams(self.op.hvparams)
8570
8571     # Disk validation
8572     disk_addremove = 0
8573     for disk_op, disk_dict in self.op.disks:
8574       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8575       if disk_op == constants.DDM_REMOVE:
8576         disk_addremove += 1
8577         continue
8578       elif disk_op == constants.DDM_ADD:
8579         disk_addremove += 1
8580       else:
8581         if not isinstance(disk_op, int):
8582           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8583         if not isinstance(disk_dict, dict):
8584           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8585           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8586
8587       if disk_op == constants.DDM_ADD:
8588         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8589         if mode not in constants.DISK_ACCESS_SET:
8590           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8591                                      errors.ECODE_INVAL)
8592         size = disk_dict.get('size', None)
8593         if size is None:
8594           raise errors.OpPrereqError("Required disk parameter size missing",
8595                                      errors.ECODE_INVAL)
8596         try:
8597           size = int(size)
8598         except (TypeError, ValueError), err:
8599           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8600                                      str(err), errors.ECODE_INVAL)
8601         disk_dict['size'] = size
8602       else:
8603         # modification of disk
8604         if 'size' in disk_dict:
8605           raise errors.OpPrereqError("Disk size change not possible, use"
8606                                      " grow-disk", errors.ECODE_INVAL)
8607
8608     if disk_addremove > 1:
8609       raise errors.OpPrereqError("Only one disk add or remove operation"
8610                                  " supported at a time", errors.ECODE_INVAL)
8611
8612     if self.op.disks and self.op.disk_template is not None:
8613       raise errors.OpPrereqError("Disk template conversion and other disk"
8614                                  " changes not supported at the same time",
8615                                  errors.ECODE_INVAL)
8616
8617     if self.op.disk_template:
8618       _CheckDiskTemplate(self.op.disk_template)
8619       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8620           self.op.remote_node is None):
8621         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8622                                    " one requires specifying a secondary node",
8623                                    errors.ECODE_INVAL)
8624
8625     # NIC validation
8626     nic_addremove = 0
8627     for nic_op, nic_dict in self.op.nics:
8628       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8629       if nic_op == constants.DDM_REMOVE:
8630         nic_addremove += 1
8631         continue
8632       elif nic_op == constants.DDM_ADD:
8633         nic_addremove += 1
8634       else:
8635         if not isinstance(nic_op, int):
8636           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8637         if not isinstance(nic_dict, dict):
8638           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8639           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8640
8641       # nic_dict should be a dict
8642       nic_ip = nic_dict.get('ip', None)
8643       if nic_ip is not None:
8644         if nic_ip.lower() == constants.VALUE_NONE:
8645           nic_dict['ip'] = None
8646         else:
8647           if not netutils.IPAddress.IsValid(nic_ip):
8648             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8649                                        errors.ECODE_INVAL)
8650
8651       nic_bridge = nic_dict.get('bridge', None)
8652       nic_link = nic_dict.get('link', None)
8653       if nic_bridge and nic_link:
8654         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8655                                    " at the same time", errors.ECODE_INVAL)
8656       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8657         nic_dict['bridge'] = None
8658       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8659         nic_dict['link'] = None
8660
8661       if nic_op == constants.DDM_ADD:
8662         nic_mac = nic_dict.get('mac', None)
8663         if nic_mac is None:
8664           nic_dict['mac'] = constants.VALUE_AUTO
8665
8666       if 'mac' in nic_dict:
8667         nic_mac = nic_dict['mac']
8668         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8669           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8670
8671         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8672           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8673                                      " modifying an existing nic",
8674                                      errors.ECODE_INVAL)
8675
8676     if nic_addremove > 1:
8677       raise errors.OpPrereqError("Only one NIC add or remove operation"
8678                                  " supported at a time", errors.ECODE_INVAL)
8679
8680   def ExpandNames(self):
8681     self._ExpandAndLockInstance()
8682     self.needed_locks[locking.LEVEL_NODE] = []
8683     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8684
8685   def DeclareLocks(self, level):
8686     if level == locking.LEVEL_NODE:
8687       self._LockInstancesNodes()
8688       if self.op.disk_template and self.op.remote_node:
8689         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8690         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8691
8692   def BuildHooksEnv(self):
8693     """Build hooks env.
8694
8695     This runs on the master, primary and secondaries.
8696
8697     """
8698     args = dict()
8699     if constants.BE_MEMORY in self.be_new:
8700       args['memory'] = self.be_new[constants.BE_MEMORY]
8701     if constants.BE_VCPUS in self.be_new:
8702       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8703     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8704     # information at all.
8705     if self.op.nics:
8706       args['nics'] = []
8707       nic_override = dict(self.op.nics)
8708       for idx, nic in enumerate(self.instance.nics):
8709         if idx in nic_override:
8710           this_nic_override = nic_override[idx]
8711         else:
8712           this_nic_override = {}
8713         if 'ip' in this_nic_override:
8714           ip = this_nic_override['ip']
8715         else:
8716           ip = nic.ip
8717         if 'mac' in this_nic_override:
8718           mac = this_nic_override['mac']
8719         else:
8720           mac = nic.mac
8721         if idx in self.nic_pnew:
8722           nicparams = self.nic_pnew[idx]
8723         else:
8724           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8725         mode = nicparams[constants.NIC_MODE]
8726         link = nicparams[constants.NIC_LINK]
8727         args['nics'].append((ip, mac, mode, link))
8728       if constants.DDM_ADD in nic_override:
8729         ip = nic_override[constants.DDM_ADD].get('ip', None)
8730         mac = nic_override[constants.DDM_ADD]['mac']
8731         nicparams = self.nic_pnew[constants.DDM_ADD]
8732         mode = nicparams[constants.NIC_MODE]
8733         link = nicparams[constants.NIC_LINK]
8734         args['nics'].append((ip, mac, mode, link))
8735       elif constants.DDM_REMOVE in nic_override:
8736         del args['nics'][-1]
8737
8738     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8739     if self.op.disk_template:
8740       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8741     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8742     return env, nl, nl
8743
8744   def CheckPrereq(self):
8745     """Check prerequisites.
8746
8747     This only checks the instance list against the existing names.
8748
8749     """
8750     # checking the new params on the primary/secondary nodes
8751
8752     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8753     cluster = self.cluster = self.cfg.GetClusterInfo()
8754     assert self.instance is not None, \
8755       "Cannot retrieve locked instance %s" % self.op.instance_name
8756     pnode = instance.primary_node
8757     nodelist = list(instance.all_nodes)
8758
8759     # OS change
8760     if self.op.os_name and not self.op.force:
8761       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8762                       self.op.force_variant)
8763       instance_os = self.op.os_name
8764     else:
8765       instance_os = instance.os
8766
8767     if self.op.disk_template:
8768       if instance.disk_template == self.op.disk_template:
8769         raise errors.OpPrereqError("Instance already has disk template %s" %
8770                                    instance.disk_template, errors.ECODE_INVAL)
8771
8772       if (instance.disk_template,
8773           self.op.disk_template) not in self._DISK_CONVERSIONS:
8774         raise errors.OpPrereqError("Unsupported disk template conversion from"
8775                                    " %s to %s" % (instance.disk_template,
8776                                                   self.op.disk_template),
8777                                    errors.ECODE_INVAL)
8778       _CheckInstanceDown(self, instance, "cannot change disk template")
8779       if self.op.disk_template in constants.DTS_NET_MIRROR:
8780         if self.op.remote_node == pnode:
8781           raise errors.OpPrereqError("Given new secondary node %s is the same"
8782                                      " as the primary node of the instance" %
8783                                      self.op.remote_node, errors.ECODE_STATE)
8784         _CheckNodeOnline(self, self.op.remote_node)
8785         _CheckNodeNotDrained(self, self.op.remote_node)
8786         disks = [{"size": d.size} for d in instance.disks]
8787         required = _ComputeDiskSize(self.op.disk_template, disks)
8788         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8789
8790     # hvparams processing
8791     if self.op.hvparams:
8792       hv_type = instance.hypervisor
8793       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8794       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8795       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8796
8797       # local check
8798       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8799       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8800       self.hv_new = hv_new # the new actual values
8801       self.hv_inst = i_hvdict # the new dict (without defaults)
8802     else:
8803       self.hv_new = self.hv_inst = {}
8804
8805     # beparams processing
8806     if self.op.beparams:
8807       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8808                                    use_none=True)
8809       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8810       be_new = cluster.SimpleFillBE(i_bedict)
8811       self.be_new = be_new # the new actual values
8812       self.be_inst = i_bedict # the new dict (without defaults)
8813     else:
8814       self.be_new = self.be_inst = {}
8815
8816     # osparams processing
8817     if self.op.osparams:
8818       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8819       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8820       self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8821       self.os_inst = i_osdict # the new dict (without defaults)
8822     else:
8823       self.os_new = self.os_inst = {}
8824
8825     self.warn = []
8826
8827     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8828       mem_check_list = [pnode]
8829       if be_new[constants.BE_AUTO_BALANCE]:
8830         # either we changed auto_balance to yes or it was from before
8831         mem_check_list.extend(instance.secondary_nodes)
8832       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8833                                                   instance.hypervisor)
8834       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8835                                          instance.hypervisor)
8836       pninfo = nodeinfo[pnode]
8837       msg = pninfo.fail_msg
8838       if msg:
8839         # Assume the primary node is unreachable and go ahead
8840         self.warn.append("Can't get info from primary node %s: %s" %
8841                          (pnode,  msg))
8842       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8843         self.warn.append("Node data from primary node %s doesn't contain"
8844                          " free memory information" % pnode)
8845       elif instance_info.fail_msg:
8846         self.warn.append("Can't get instance runtime information: %s" %
8847                         instance_info.fail_msg)
8848       else:
8849         if instance_info.payload:
8850           current_mem = int(instance_info.payload['memory'])
8851         else:
8852           # Assume instance not running
8853           # (there is a slight race condition here, but it's not very probable,
8854           # and we have no other way to check)
8855           current_mem = 0
8856         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8857                     pninfo.payload['memory_free'])
8858         if miss_mem > 0:
8859           raise errors.OpPrereqError("This change will prevent the instance"
8860                                      " from starting, due to %d MB of memory"
8861                                      " missing on its primary node" % miss_mem,
8862                                      errors.ECODE_NORES)
8863
8864       if be_new[constants.BE_AUTO_BALANCE]:
8865         for node, nres in nodeinfo.items():
8866           if node not in instance.secondary_nodes:
8867             continue
8868           msg = nres.fail_msg
8869           if msg:
8870             self.warn.append("Can't get info from secondary node %s: %s" %
8871                              (node, msg))
8872           elif not isinstance(nres.payload.get('memory_free', None), int):
8873             self.warn.append("Secondary node %s didn't return free"
8874                              " memory information" % node)
8875           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8876             self.warn.append("Not enough memory to failover instance to"
8877                              " secondary node %s" % node)
8878
8879     # NIC processing
8880     self.nic_pnew = {}
8881     self.nic_pinst = {}
8882     for nic_op, nic_dict in self.op.nics:
8883       if nic_op == constants.DDM_REMOVE:
8884         if not instance.nics:
8885           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8886                                      errors.ECODE_INVAL)
8887         continue
8888       if nic_op != constants.DDM_ADD:
8889         # an existing nic
8890         if not instance.nics:
8891           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8892                                      " no NICs" % nic_op,
8893                                      errors.ECODE_INVAL)
8894         if nic_op < 0 or nic_op >= len(instance.nics):
8895           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8896                                      " are 0 to %d" %
8897                                      (nic_op, len(instance.nics) - 1),
8898                                      errors.ECODE_INVAL)
8899         old_nic_params = instance.nics[nic_op].nicparams
8900         old_nic_ip = instance.nics[nic_op].ip
8901       else:
8902         old_nic_params = {}
8903         old_nic_ip = None
8904
8905       update_params_dict = dict([(key, nic_dict[key])
8906                                  for key in constants.NICS_PARAMETERS
8907                                  if key in nic_dict])
8908
8909       if 'bridge' in nic_dict:
8910         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8911
8912       new_nic_params = _GetUpdatedParams(old_nic_params,
8913                                          update_params_dict)
8914       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8915       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8916       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8917       self.nic_pinst[nic_op] = new_nic_params
8918       self.nic_pnew[nic_op] = new_filled_nic_params
8919       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8920
8921       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8922         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8923         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8924         if msg:
8925           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8926           if self.op.force:
8927             self.warn.append(msg)
8928           else:
8929             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8930       if new_nic_mode == constants.NIC_MODE_ROUTED:
8931         if 'ip' in nic_dict:
8932           nic_ip = nic_dict['ip']
8933         else:
8934           nic_ip = old_nic_ip
8935         if nic_ip is None:
8936           raise errors.OpPrereqError('Cannot set the nic ip to None'
8937                                      ' on a routed nic', errors.ECODE_INVAL)
8938       if 'mac' in nic_dict:
8939         nic_mac = nic_dict['mac']
8940         if nic_mac is None:
8941           raise errors.OpPrereqError('Cannot set the nic mac to None',
8942                                      errors.ECODE_INVAL)
8943         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8944           # otherwise generate the mac
8945           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8946         else:
8947           # or validate/reserve the current one
8948           try:
8949             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8950           except errors.ReservationError:
8951             raise errors.OpPrereqError("MAC address %s already in use"
8952                                        " in cluster" % nic_mac,
8953                                        errors.ECODE_NOTUNIQUE)
8954
8955     # DISK processing
8956     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8957       raise errors.OpPrereqError("Disk operations not supported for"
8958                                  " diskless instances",
8959                                  errors.ECODE_INVAL)
8960     for disk_op, _ in self.op.disks:
8961       if disk_op == constants.DDM_REMOVE:
8962         if len(instance.disks) == 1:
8963           raise errors.OpPrereqError("Cannot remove the last disk of"
8964                                      " an instance", errors.ECODE_INVAL)
8965         _CheckInstanceDown(self, instance, "cannot remove disks")
8966
8967       if (disk_op == constants.DDM_ADD and
8968           len(instance.nics) >= constants.MAX_DISKS):
8969         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8970                                    " add more" % constants.MAX_DISKS,
8971                                    errors.ECODE_STATE)
8972       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8973         # an existing disk
8974         if disk_op < 0 or disk_op >= len(instance.disks):
8975           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8976                                      " are 0 to %d" %
8977                                      (disk_op, len(instance.disks)),
8978                                      errors.ECODE_INVAL)
8979
8980     return
8981
8982   def _ConvertPlainToDrbd(self, feedback_fn):
8983     """Converts an instance from plain to drbd.
8984
8985     """
8986     feedback_fn("Converting template to drbd")
8987     instance = self.instance
8988     pnode = instance.primary_node
8989     snode = self.op.remote_node
8990
8991     # create a fake disk info for _GenerateDiskTemplate
8992     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8993     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8994                                       instance.name, pnode, [snode],
8995                                       disk_info, None, None, 0)
8996     info = _GetInstanceInfoText(instance)
8997     feedback_fn("Creating aditional volumes...")
8998     # first, create the missing data and meta devices
8999     for disk in new_disks:
9000       # unfortunately this is... not too nice
9001       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
9002                             info, True)
9003       for child in disk.children:
9004         _CreateSingleBlockDev(self, snode, instance, child, info, True)
9005     # at this stage, all new LVs have been created, we can rename the
9006     # old ones
9007     feedback_fn("Renaming original volumes...")
9008     rename_list = [(o, n.children[0].logical_id)
9009                    for (o, n) in zip(instance.disks, new_disks)]
9010     result = self.rpc.call_blockdev_rename(pnode, rename_list)
9011     result.Raise("Failed to rename original LVs")
9012
9013     feedback_fn("Initializing DRBD devices...")
9014     # all child devices are in place, we can now create the DRBD devices
9015     for disk in new_disks:
9016       for node in [pnode, snode]:
9017         f_create = node == pnode
9018         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
9019
9020     # at this point, the instance has been modified
9021     instance.disk_template = constants.DT_DRBD8
9022     instance.disks = new_disks
9023     self.cfg.Update(instance, feedback_fn)
9024
9025     # disks are created, waiting for sync
9026     disk_abort = not _WaitForSync(self, instance)
9027     if disk_abort:
9028       raise errors.OpExecError("There are some degraded disks for"
9029                                " this instance, please cleanup manually")
9030
9031   def _ConvertDrbdToPlain(self, feedback_fn):
9032     """Converts an instance from drbd to plain.
9033
9034     """
9035     instance = self.instance
9036     assert len(instance.secondary_nodes) == 1
9037     pnode = instance.primary_node
9038     snode = instance.secondary_nodes[0]
9039     feedback_fn("Converting template to plain")
9040
9041     old_disks = instance.disks
9042     new_disks = [d.children[0] for d in old_disks]
9043
9044     # copy over size and mode
9045     for parent, child in zip(old_disks, new_disks):
9046       child.size = parent.size
9047       child.mode = parent.mode
9048
9049     # update instance structure
9050     instance.disks = new_disks
9051     instance.disk_template = constants.DT_PLAIN
9052     self.cfg.Update(instance, feedback_fn)
9053
9054     feedback_fn("Removing volumes on the secondary node...")
9055     for disk in old_disks:
9056       self.cfg.SetDiskID(disk, snode)
9057       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
9058       if msg:
9059         self.LogWarning("Could not remove block device %s on node %s,"
9060                         " continuing anyway: %s", disk.iv_name, snode, msg)
9061
9062     feedback_fn("Removing unneeded volumes on the primary node...")
9063     for idx, disk in enumerate(old_disks):
9064       meta = disk.children[1]
9065       self.cfg.SetDiskID(meta, pnode)
9066       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
9067       if msg:
9068         self.LogWarning("Could not remove metadata for disk %d on node %s,"
9069                         " continuing anyway: %s", idx, pnode, msg)
9070
9071
9072   def Exec(self, feedback_fn):
9073     """Modifies an instance.
9074
9075     All parameters take effect only at the next restart of the instance.
9076
9077     """
9078     # Process here the warnings from CheckPrereq, as we don't have a
9079     # feedback_fn there.
9080     for warn in self.warn:
9081       feedback_fn("WARNING: %s" % warn)
9082
9083     result = []
9084     instance = self.instance
9085     # disk changes
9086     for disk_op, disk_dict in self.op.disks:
9087       if disk_op == constants.DDM_REMOVE:
9088         # remove the last disk
9089         device = instance.disks.pop()
9090         device_idx = len(instance.disks)
9091         for node, disk in device.ComputeNodeTree(instance.primary_node):
9092           self.cfg.SetDiskID(disk, node)
9093           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
9094           if msg:
9095             self.LogWarning("Could not remove disk/%d on node %s: %s,"
9096                             " continuing anyway", device_idx, node, msg)
9097         result.append(("disk/%d" % device_idx, "remove"))
9098       elif disk_op == constants.DDM_ADD:
9099         # add a new disk
9100         if instance.disk_template == constants.DT_FILE:
9101           file_driver, file_path = instance.disks[0].logical_id
9102           file_path = os.path.dirname(file_path)
9103         else:
9104           file_driver = file_path = None
9105         disk_idx_base = len(instance.disks)
9106         new_disk = _GenerateDiskTemplate(self,
9107                                          instance.disk_template,
9108                                          instance.name, instance.primary_node,
9109                                          instance.secondary_nodes,
9110                                          [disk_dict],
9111                                          file_path,
9112                                          file_driver,
9113                                          disk_idx_base)[0]
9114         instance.disks.append(new_disk)
9115         info = _GetInstanceInfoText(instance)
9116
9117         logging.info("Creating volume %s for instance %s",
9118                      new_disk.iv_name, instance.name)
9119         # Note: this needs to be kept in sync with _CreateDisks
9120         #HARDCODE
9121         for node in instance.all_nodes:
9122           f_create = node == instance.primary_node
9123           try:
9124             _CreateBlockDev(self, node, instance, new_disk,
9125                             f_create, info, f_create)
9126           except errors.OpExecError, err:
9127             self.LogWarning("Failed to create volume %s (%s) on"
9128                             " node %s: %s",
9129                             new_disk.iv_name, new_disk, node, err)
9130         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9131                        (new_disk.size, new_disk.mode)))
9132       else:
9133         # change a given disk
9134         instance.disks[disk_op].mode = disk_dict['mode']
9135         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9136
9137     if self.op.disk_template:
9138       r_shut = _ShutdownInstanceDisks(self, instance)
9139       if not r_shut:
9140         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9141                                  " proceed with disk template conversion")
9142       mode = (instance.disk_template, self.op.disk_template)
9143       try:
9144         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9145       except:
9146         self.cfg.ReleaseDRBDMinors(instance.name)
9147         raise
9148       result.append(("disk_template", self.op.disk_template))
9149
9150     # NIC changes
9151     for nic_op, nic_dict in self.op.nics:
9152       if nic_op == constants.DDM_REMOVE:
9153         # remove the last nic
9154         del instance.nics[-1]
9155         result.append(("nic.%d" % len(instance.nics), "remove"))
9156       elif nic_op == constants.DDM_ADD:
9157         # mac and bridge should be set, by now
9158         mac = nic_dict['mac']
9159         ip = nic_dict.get('ip', None)
9160         nicparams = self.nic_pinst[constants.DDM_ADD]
9161         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9162         instance.nics.append(new_nic)
9163         result.append(("nic.%d" % (len(instance.nics) - 1),
9164                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9165                        (new_nic.mac, new_nic.ip,
9166                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9167                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9168                        )))
9169       else:
9170         for key in 'mac', 'ip':
9171           if key in nic_dict:
9172             setattr(instance.nics[nic_op], key, nic_dict[key])
9173         if nic_op in self.nic_pinst:
9174           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9175         for key, val in nic_dict.iteritems():
9176           result.append(("nic.%s/%d" % (key, nic_op), val))
9177
9178     # hvparams changes
9179     if self.op.hvparams:
9180       instance.hvparams = self.hv_inst
9181       for key, val in self.op.hvparams.iteritems():
9182         result.append(("hv/%s" % key, val))
9183
9184     # beparams changes
9185     if self.op.beparams:
9186       instance.beparams = self.be_inst
9187       for key, val in self.op.beparams.iteritems():
9188         result.append(("be/%s" % key, val))
9189
9190     # OS change
9191     if self.op.os_name:
9192       instance.os = self.op.os_name
9193
9194     # osparams changes
9195     if self.op.osparams:
9196       instance.osparams = self.os_inst
9197       for key, val in self.op.osparams.iteritems():
9198         result.append(("os/%s" % key, val))
9199
9200     self.cfg.Update(instance, feedback_fn)
9201
9202     return result
9203
9204   _DISK_CONVERSIONS = {
9205     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9206     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9207     }
9208
9209
9210 class LUQueryExports(NoHooksLU):
9211   """Query the exports list
9212
9213   """
9214   _OP_PARAMS = [
9215     ("nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9216     ("use_locking", False, ht.TBool),
9217     ]
9218   REQ_BGL = False
9219
9220   def ExpandNames(self):
9221     self.needed_locks = {}
9222     self.share_locks[locking.LEVEL_NODE] = 1
9223     if not self.op.nodes:
9224       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9225     else:
9226       self.needed_locks[locking.LEVEL_NODE] = \
9227         _GetWantedNodes(self, self.op.nodes)
9228
9229   def Exec(self, feedback_fn):
9230     """Compute the list of all the exported system images.
9231
9232     @rtype: dict
9233     @return: a dictionary with the structure node->(export-list)
9234         where export-list is a list of the instances exported on
9235         that node.
9236
9237     """
9238     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9239     rpcresult = self.rpc.call_export_list(self.nodes)
9240     result = {}
9241     for node in rpcresult:
9242       if rpcresult[node].fail_msg:
9243         result[node] = False
9244       else:
9245         result[node] = rpcresult[node].payload
9246
9247     return result
9248
9249
9250 class LUPrepareExport(NoHooksLU):
9251   """Prepares an instance for an export and returns useful information.
9252
9253   """
9254   _OP_PARAMS = [
9255     _PInstanceName,
9256     ("mode", ht.NoDefault, ht.TElemOf(constants.EXPORT_MODES)),
9257     ]
9258   REQ_BGL = False
9259
9260   def ExpandNames(self):
9261     self._ExpandAndLockInstance()
9262
9263   def CheckPrereq(self):
9264     """Check prerequisites.
9265
9266     """
9267     instance_name = self.op.instance_name
9268
9269     self.instance = self.cfg.GetInstanceInfo(instance_name)
9270     assert self.instance is not None, \
9271           "Cannot retrieve locked instance %s" % self.op.instance_name
9272     _CheckNodeOnline(self, self.instance.primary_node)
9273
9274     self._cds = _GetClusterDomainSecret()
9275
9276   def Exec(self, feedback_fn):
9277     """Prepares an instance for an export.
9278
9279     """
9280     instance = self.instance
9281
9282     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9283       salt = utils.GenerateSecret(8)
9284
9285       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9286       result = self.rpc.call_x509_cert_create(instance.primary_node,
9287                                               constants.RIE_CERT_VALIDITY)
9288       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9289
9290       (name, cert_pem) = result.payload
9291
9292       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9293                                              cert_pem)
9294
9295       return {
9296         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9297         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9298                           salt),
9299         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9300         }
9301
9302     return None
9303
9304
9305 class LUExportInstance(LogicalUnit):
9306   """Export an instance to an image in the cluster.
9307
9308   """
9309   HPATH = "instance-export"
9310   HTYPE = constants.HTYPE_INSTANCE
9311   _OP_PARAMS = [
9312     _PInstanceName,
9313     ("target_node", ht.NoDefault, ht.TOr(ht.TNonEmptyString, ht.TList)),
9314     ("shutdown", True, ht.TBool),
9315     _PShutdownTimeout,
9316     ("remove_instance", False, ht.TBool),
9317     ("ignore_remove_failures", False, ht.TBool),
9318     ("mode", constants.EXPORT_MODE_LOCAL, ht.TElemOf(constants.EXPORT_MODES)),
9319     ("x509_key_name", None, ht.TOr(ht.TList, ht.TNone)),
9320     ("destination_x509_ca", None, ht.TMaybeString),
9321     ]
9322   REQ_BGL = False
9323
9324   def CheckArguments(self):
9325     """Check the arguments.
9326
9327     """
9328     self.x509_key_name = self.op.x509_key_name
9329     self.dest_x509_ca_pem = self.op.destination_x509_ca
9330
9331     if self.op.remove_instance and not self.op.shutdown:
9332       raise errors.OpPrereqError("Can not remove instance without shutting it"
9333                                  " down before")
9334
9335     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9336       if not self.x509_key_name:
9337         raise errors.OpPrereqError("Missing X509 key name for encryption",
9338                                    errors.ECODE_INVAL)
9339
9340       if not self.dest_x509_ca_pem:
9341         raise errors.OpPrereqError("Missing destination X509 CA",
9342                                    errors.ECODE_INVAL)
9343
9344   def ExpandNames(self):
9345     self._ExpandAndLockInstance()
9346
9347     # Lock all nodes for local exports
9348     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9349       # FIXME: lock only instance primary and destination node
9350       #
9351       # Sad but true, for now we have do lock all nodes, as we don't know where
9352       # the previous export might be, and in this LU we search for it and
9353       # remove it from its current node. In the future we could fix this by:
9354       #  - making a tasklet to search (share-lock all), then create the
9355       #    new one, then one to remove, after
9356       #  - removing the removal operation altogether
9357       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9358
9359   def DeclareLocks(self, level):
9360     """Last minute lock declaration."""
9361     # All nodes are locked anyway, so nothing to do here.
9362
9363   def BuildHooksEnv(self):
9364     """Build hooks env.
9365
9366     This will run on the master, primary node and target node.
9367
9368     """
9369     env = {
9370       "EXPORT_MODE": self.op.mode,
9371       "EXPORT_NODE": self.op.target_node,
9372       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9373       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9374       # TODO: Generic function for boolean env variables
9375       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9376       }
9377
9378     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9379
9380     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9381
9382     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9383       nl.append(self.op.target_node)
9384
9385     return env, nl, nl
9386
9387   def CheckPrereq(self):
9388     """Check prerequisites.
9389
9390     This checks that the instance and node names are valid.
9391
9392     """
9393     instance_name = self.op.instance_name
9394
9395     self.instance = self.cfg.GetInstanceInfo(instance_name)
9396     assert self.instance is not None, \
9397           "Cannot retrieve locked instance %s" % self.op.instance_name
9398     _CheckNodeOnline(self, self.instance.primary_node)
9399
9400     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9401       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9402       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9403       assert self.dst_node is not None
9404
9405       _CheckNodeOnline(self, self.dst_node.name)
9406       _CheckNodeNotDrained(self, self.dst_node.name)
9407
9408       self._cds = None
9409       self.dest_disk_info = None
9410       self.dest_x509_ca = None
9411
9412     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9413       self.dst_node = None
9414
9415       if len(self.op.target_node) != len(self.instance.disks):
9416         raise errors.OpPrereqError(("Received destination information for %s"
9417                                     " disks, but instance %s has %s disks") %
9418                                    (len(self.op.target_node), instance_name,
9419                                     len(self.instance.disks)),
9420                                    errors.ECODE_INVAL)
9421
9422       cds = _GetClusterDomainSecret()
9423
9424       # Check X509 key name
9425       try:
9426         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9427       except (TypeError, ValueError), err:
9428         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9429
9430       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9431         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9432                                    errors.ECODE_INVAL)
9433
9434       # Load and verify CA
9435       try:
9436         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9437       except OpenSSL.crypto.Error, err:
9438         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9439                                    (err, ), errors.ECODE_INVAL)
9440
9441       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9442       if errcode is not None:
9443         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9444                                    (msg, ), errors.ECODE_INVAL)
9445
9446       self.dest_x509_ca = cert
9447
9448       # Verify target information
9449       disk_info = []
9450       for idx, disk_data in enumerate(self.op.target_node):
9451         try:
9452           (host, port, magic) = \
9453             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9454         except errors.GenericError, err:
9455           raise errors.OpPrereqError("Target info for disk %s: %s" %
9456                                      (idx, err), errors.ECODE_INVAL)
9457
9458         disk_info.append((host, port, magic))
9459
9460       assert len(disk_info) == len(self.op.target_node)
9461       self.dest_disk_info = disk_info
9462
9463     else:
9464       raise errors.ProgrammerError("Unhandled export mode %r" %
9465                                    self.op.mode)
9466
9467     # instance disk type verification
9468     # TODO: Implement export support for file-based disks
9469     for disk in self.instance.disks:
9470       if disk.dev_type == constants.LD_FILE:
9471         raise errors.OpPrereqError("Export not supported for instances with"
9472                                    " file-based disks", errors.ECODE_INVAL)
9473
9474   def _CleanupExports(self, feedback_fn):
9475     """Removes exports of current instance from all other nodes.
9476
9477     If an instance in a cluster with nodes A..D was exported to node C, its
9478     exports will be removed from the nodes A, B and D.
9479
9480     """
9481     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9482
9483     nodelist = self.cfg.GetNodeList()
9484     nodelist.remove(self.dst_node.name)
9485
9486     # on one-node clusters nodelist will be empty after the removal
9487     # if we proceed the backup would be removed because OpQueryExports
9488     # substitutes an empty list with the full cluster node list.
9489     iname = self.instance.name
9490     if nodelist:
9491       feedback_fn("Removing old exports for instance %s" % iname)
9492       exportlist = self.rpc.call_export_list(nodelist)
9493       for node in exportlist:
9494         if exportlist[node].fail_msg:
9495           continue
9496         if iname in exportlist[node].payload:
9497           msg = self.rpc.call_export_remove(node, iname).fail_msg
9498           if msg:
9499             self.LogWarning("Could not remove older export for instance %s"
9500                             " on node %s: %s", iname, node, msg)
9501
9502   def Exec(self, feedback_fn):
9503     """Export an instance to an image in the cluster.
9504
9505     """
9506     assert self.op.mode in constants.EXPORT_MODES
9507
9508     instance = self.instance
9509     src_node = instance.primary_node
9510
9511     if self.op.shutdown:
9512       # shutdown the instance, but not the disks
9513       feedback_fn("Shutting down instance %s" % instance.name)
9514       result = self.rpc.call_instance_shutdown(src_node, instance,
9515                                                self.op.shutdown_timeout)
9516       # TODO: Maybe ignore failures if ignore_remove_failures is set
9517       result.Raise("Could not shutdown instance %s on"
9518                    " node %s" % (instance.name, src_node))
9519
9520     # set the disks ID correctly since call_instance_start needs the
9521     # correct drbd minor to create the symlinks
9522     for disk in instance.disks:
9523       self.cfg.SetDiskID(disk, src_node)
9524
9525     activate_disks = (not instance.admin_up)
9526
9527     if activate_disks:
9528       # Activate the instance disks if we'exporting a stopped instance
9529       feedback_fn("Activating disks for %s" % instance.name)
9530       _StartInstanceDisks(self, instance, None)
9531
9532     try:
9533       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9534                                                      instance)
9535
9536       helper.CreateSnapshots()
9537       try:
9538         if (self.op.shutdown and instance.admin_up and
9539             not self.op.remove_instance):
9540           assert not activate_disks
9541           feedback_fn("Starting instance %s" % instance.name)
9542           result = self.rpc.call_instance_start(src_node, instance, None, None)
9543           msg = result.fail_msg
9544           if msg:
9545             feedback_fn("Failed to start instance: %s" % msg)
9546             _ShutdownInstanceDisks(self, instance)
9547             raise errors.OpExecError("Could not start instance: %s" % msg)
9548
9549         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9550           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9551         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9552           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9553           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9554
9555           (key_name, _, _) = self.x509_key_name
9556
9557           dest_ca_pem = \
9558             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9559                                             self.dest_x509_ca)
9560
9561           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9562                                                      key_name, dest_ca_pem,
9563                                                      timeouts)
9564       finally:
9565         helper.Cleanup()
9566
9567       # Check for backwards compatibility
9568       assert len(dresults) == len(instance.disks)
9569       assert compat.all(isinstance(i, bool) for i in dresults), \
9570              "Not all results are boolean: %r" % dresults
9571
9572     finally:
9573       if activate_disks:
9574         feedback_fn("Deactivating disks for %s" % instance.name)
9575         _ShutdownInstanceDisks(self, instance)
9576
9577     if not (compat.all(dresults) and fin_resu):
9578       failures = []
9579       if not fin_resu:
9580         failures.append("export finalization")
9581       if not compat.all(dresults):
9582         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
9583                                if not dsk)
9584         failures.append("disk export: disk(s) %s" % fdsk)
9585
9586       raise errors.OpExecError("Export failed, errors in %s" %
9587                                utils.CommaJoin(failures))
9588
9589     # At this point, the export was successful, we can cleanup/finish
9590
9591     # Remove instance if requested
9592     if self.op.remove_instance:
9593       feedback_fn("Removing instance %s" % instance.name)
9594       _RemoveInstance(self, feedback_fn, instance,
9595                       self.op.ignore_remove_failures)
9596
9597     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9598       self._CleanupExports(feedback_fn)
9599
9600     return fin_resu, dresults
9601
9602
9603 class LURemoveExport(NoHooksLU):
9604   """Remove exports related to the named instance.
9605
9606   """
9607   _OP_PARAMS = [
9608     _PInstanceName,
9609     ]
9610   REQ_BGL = False
9611
9612   def ExpandNames(self):
9613     self.needed_locks = {}
9614     # We need all nodes to be locked in order for RemoveExport to work, but we
9615     # don't need to lock the instance itself, as nothing will happen to it (and
9616     # we can remove exports also for a removed instance)
9617     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9618
9619   def Exec(self, feedback_fn):
9620     """Remove any export.
9621
9622     """
9623     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9624     # If the instance was not found we'll try with the name that was passed in.
9625     # This will only work if it was an FQDN, though.
9626     fqdn_warn = False
9627     if not instance_name:
9628       fqdn_warn = True
9629       instance_name = self.op.instance_name
9630
9631     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9632     exportlist = self.rpc.call_export_list(locked_nodes)
9633     found = False
9634     for node in exportlist:
9635       msg = exportlist[node].fail_msg
9636       if msg:
9637         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9638         continue
9639       if instance_name in exportlist[node].payload:
9640         found = True
9641         result = self.rpc.call_export_remove(node, instance_name)
9642         msg = result.fail_msg
9643         if msg:
9644           logging.error("Could not remove export for instance %s"
9645                         " on node %s: %s", instance_name, node, msg)
9646
9647     if fqdn_warn and not found:
9648       feedback_fn("Export not found. If trying to remove an export belonging"
9649                   " to a deleted instance please use its Fully Qualified"
9650                   " Domain Name.")
9651
9652
9653 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9654   """Generic tags LU.
9655
9656   This is an abstract class which is the parent of all the other tags LUs.
9657
9658   """
9659
9660   def ExpandNames(self):
9661     self.needed_locks = {}
9662     if self.op.kind == constants.TAG_NODE:
9663       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9664       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9665     elif self.op.kind == constants.TAG_INSTANCE:
9666       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9667       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9668
9669     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
9670     # not possible to acquire the BGL based on opcode parameters)
9671
9672   def CheckPrereq(self):
9673     """Check prerequisites.
9674
9675     """
9676     if self.op.kind == constants.TAG_CLUSTER:
9677       self.target = self.cfg.GetClusterInfo()
9678     elif self.op.kind == constants.TAG_NODE:
9679       self.target = self.cfg.GetNodeInfo(self.op.name)
9680     elif self.op.kind == constants.TAG_INSTANCE:
9681       self.target = self.cfg.GetInstanceInfo(self.op.name)
9682     else:
9683       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9684                                  str(self.op.kind), errors.ECODE_INVAL)
9685
9686
9687 class LUGetTags(TagsLU):
9688   """Returns the tags of a given object.
9689
9690   """
9691   _OP_PARAMS = [
9692     ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
9693     # Name is only meaningful for nodes and instances
9694     ("name", ht.NoDefault, ht.TMaybeString),
9695     ]
9696   REQ_BGL = False
9697
9698   def ExpandNames(self):
9699     TagsLU.ExpandNames(self)
9700
9701     # Share locks as this is only a read operation
9702     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9703
9704   def Exec(self, feedback_fn):
9705     """Returns the tag list.
9706
9707     """
9708     return list(self.target.GetTags())
9709
9710
9711 class LUSearchTags(NoHooksLU):
9712   """Searches the tags for a given pattern.
9713
9714   """
9715   _OP_PARAMS = [
9716     ("pattern", ht.NoDefault, ht.TNonEmptyString),
9717     ]
9718   REQ_BGL = False
9719
9720   def ExpandNames(self):
9721     self.needed_locks = {}
9722
9723   def CheckPrereq(self):
9724     """Check prerequisites.
9725
9726     This checks the pattern passed for validity by compiling it.
9727
9728     """
9729     try:
9730       self.re = re.compile(self.op.pattern)
9731     except re.error, err:
9732       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9733                                  (self.op.pattern, err), errors.ECODE_INVAL)
9734
9735   def Exec(self, feedback_fn):
9736     """Returns the tag list.
9737
9738     """
9739     cfg = self.cfg
9740     tgts = [("/cluster", cfg.GetClusterInfo())]
9741     ilist = cfg.GetAllInstancesInfo().values()
9742     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9743     nlist = cfg.GetAllNodesInfo().values()
9744     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9745     results = []
9746     for path, target in tgts:
9747       for tag in target.GetTags():
9748         if self.re.search(tag):
9749           results.append((path, tag))
9750     return results
9751
9752
9753 class LUAddTags(TagsLU):
9754   """Sets a tag on a given object.
9755
9756   """
9757   _OP_PARAMS = [
9758     ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
9759     # Name is only meaningful for nodes and instances
9760     ("name", ht.NoDefault, ht.TMaybeString),
9761     ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
9762     ]
9763   REQ_BGL = False
9764
9765   def CheckPrereq(self):
9766     """Check prerequisites.
9767
9768     This checks the type and length of the tag name and value.
9769
9770     """
9771     TagsLU.CheckPrereq(self)
9772     for tag in self.op.tags:
9773       objects.TaggableObject.ValidateTag(tag)
9774
9775   def Exec(self, feedback_fn):
9776     """Sets the tag.
9777
9778     """
9779     try:
9780       for tag in self.op.tags:
9781         self.target.AddTag(tag)
9782     except errors.TagError, err:
9783       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9784     self.cfg.Update(self.target, feedback_fn)
9785
9786
9787 class LUDelTags(TagsLU):
9788   """Delete a list of tags from a given object.
9789
9790   """
9791   _OP_PARAMS = [
9792     ("kind", ht.NoDefault, ht.TElemOf(constants.VALID_TAG_TYPES)),
9793     # Name is only meaningful for nodes and instances
9794     ("name", ht.NoDefault, ht.TMaybeString),
9795     ("tags", ht.NoDefault, ht.TListOf(ht.TNonEmptyString)),
9796     ]
9797   REQ_BGL = False
9798
9799   def CheckPrereq(self):
9800     """Check prerequisites.
9801
9802     This checks that we have the given tag.
9803
9804     """
9805     TagsLU.CheckPrereq(self)
9806     for tag in self.op.tags:
9807       objects.TaggableObject.ValidateTag(tag)
9808     del_tags = frozenset(self.op.tags)
9809     cur_tags = self.target.GetTags()
9810
9811     diff_tags = del_tags - cur_tags
9812     if diff_tags:
9813       diff_names = ("'%s'" % i for i in sorted(diff_tags))
9814       raise errors.OpPrereqError("Tag(s) %s not found" %
9815                                  (utils.CommaJoin(diff_names), ),
9816                                  errors.ECODE_NOENT)
9817
9818   def Exec(self, feedback_fn):
9819     """Remove the tag from the object.
9820
9821     """
9822     for tag in self.op.tags:
9823       self.target.RemoveTag(tag)
9824     self.cfg.Update(self.target, feedback_fn)
9825
9826
9827 class LUTestDelay(NoHooksLU):
9828   """Sleep for a specified amount of time.
9829
9830   This LU sleeps on the master and/or nodes for a specified amount of
9831   time.
9832
9833   """
9834   _OP_PARAMS = [
9835     ("duration", ht.NoDefault, ht.TFloat),
9836     ("on_master", True, ht.TBool),
9837     ("on_nodes", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
9838     ("repeat", 0, ht.TPositiveInt)
9839     ]
9840   REQ_BGL = False
9841
9842   def ExpandNames(self):
9843     """Expand names and set required locks.
9844
9845     This expands the node list, if any.
9846
9847     """
9848     self.needed_locks = {}
9849     if self.op.on_nodes:
9850       # _GetWantedNodes can be used here, but is not always appropriate to use
9851       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9852       # more information.
9853       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9854       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9855
9856   def _TestDelay(self):
9857     """Do the actual sleep.
9858
9859     """
9860     if self.op.on_master:
9861       if not utils.TestDelay(self.op.duration):
9862         raise errors.OpExecError("Error during master delay test")
9863     if self.op.on_nodes:
9864       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9865       for node, node_result in result.items():
9866         node_result.Raise("Failure during rpc call to node %s" % node)
9867
9868   def Exec(self, feedback_fn):
9869     """Execute the test delay opcode, with the wanted repetitions.
9870
9871     """
9872     if self.op.repeat == 0:
9873       self._TestDelay()
9874     else:
9875       top_value = self.op.repeat - 1
9876       for i in range(self.op.repeat):
9877         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9878         self._TestDelay()
9879
9880
9881 class LUTestJobqueue(NoHooksLU):
9882   """Utility LU to test some aspects of the job queue.
9883
9884   """
9885   _OP_PARAMS = [
9886     ("notify_waitlock", False, ht.TBool),
9887     ("notify_exec", False, ht.TBool),
9888     ("log_messages", ht.EmptyList, ht.TListOf(ht.TString)),
9889     ("fail", False, ht.TBool),
9890     ]
9891   REQ_BGL = False
9892
9893   # Must be lower than default timeout for WaitForJobChange to see whether it
9894   # notices changed jobs
9895   _CLIENT_CONNECT_TIMEOUT = 20.0
9896   _CLIENT_CONFIRM_TIMEOUT = 60.0
9897
9898   @classmethod
9899   def _NotifyUsingSocket(cls, cb, errcls):
9900     """Opens a Unix socket and waits for another program to connect.
9901
9902     @type cb: callable
9903     @param cb: Callback to send socket name to client
9904     @type errcls: class
9905     @param errcls: Exception class to use for errors
9906
9907     """
9908     # Using a temporary directory as there's no easy way to create temporary
9909     # sockets without writing a custom loop around tempfile.mktemp and
9910     # socket.bind
9911     tmpdir = tempfile.mkdtemp()
9912     try:
9913       tmpsock = utils.PathJoin(tmpdir, "sock")
9914
9915       logging.debug("Creating temporary socket at %s", tmpsock)
9916       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
9917       try:
9918         sock.bind(tmpsock)
9919         sock.listen(1)
9920
9921         # Send details to client
9922         cb(tmpsock)
9923
9924         # Wait for client to connect before continuing
9925         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
9926         try:
9927           (conn, _) = sock.accept()
9928         except socket.error, err:
9929           raise errcls("Client didn't connect in time (%s)" % err)
9930       finally:
9931         sock.close()
9932     finally:
9933       # Remove as soon as client is connected
9934       shutil.rmtree(tmpdir)
9935
9936     # Wait for client to close
9937     try:
9938       try:
9939         # pylint: disable-msg=E1101
9940         # Instance of '_socketobject' has no ... member
9941         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
9942         conn.recv(1)
9943       except socket.error, err:
9944         raise errcls("Client failed to confirm notification (%s)" % err)
9945     finally:
9946       conn.close()
9947
9948   def _SendNotification(self, test, arg, sockname):
9949     """Sends a notification to the client.
9950
9951     @type test: string
9952     @param test: Test name
9953     @param arg: Test argument (depends on test)
9954     @type sockname: string
9955     @param sockname: Socket path
9956
9957     """
9958     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
9959
9960   def _Notify(self, prereq, test, arg):
9961     """Notifies the client of a test.
9962
9963     @type prereq: bool
9964     @param prereq: Whether this is a prereq-phase test
9965     @type test: string
9966     @param test: Test name
9967     @param arg: Test argument (depends on test)
9968
9969     """
9970     if prereq:
9971       errcls = errors.OpPrereqError
9972     else:
9973       errcls = errors.OpExecError
9974
9975     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
9976                                                   test, arg),
9977                                    errcls)
9978
9979   def CheckArguments(self):
9980     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
9981     self.expandnames_calls = 0
9982
9983   def ExpandNames(self):
9984     checkargs_calls = getattr(self, "checkargs_calls", 0)
9985     if checkargs_calls < 1:
9986       raise errors.ProgrammerError("CheckArguments was not called")
9987
9988     self.expandnames_calls += 1
9989
9990     if self.op.notify_waitlock:
9991       self._Notify(True, constants.JQT_EXPANDNAMES, None)
9992
9993     self.LogInfo("Expanding names")
9994
9995     # Get lock on master node (just to get a lock, not for a particular reason)
9996     self.needed_locks = {
9997       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
9998       }
9999
10000   def Exec(self, feedback_fn):
10001     if self.expandnames_calls < 1:
10002       raise errors.ProgrammerError("ExpandNames was not called")
10003
10004     if self.op.notify_exec:
10005       self._Notify(False, constants.JQT_EXEC, None)
10006
10007     self.LogInfo("Executing")
10008
10009     if self.op.log_messages:
10010       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
10011       for idx, msg in enumerate(self.op.log_messages):
10012         self.LogInfo("Sending log message %s", idx + 1)
10013         feedback_fn(constants.JQT_MSGPREFIX + msg)
10014         # Report how many test messages have been sent
10015         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
10016
10017     if self.op.fail:
10018       raise errors.OpExecError("Opcode failure was requested")
10019
10020     return True
10021
10022
10023 class IAllocator(object):
10024   """IAllocator framework.
10025
10026   An IAllocator instance has three sets of attributes:
10027     - cfg that is needed to query the cluster
10028     - input data (all members of the _KEYS class attribute are required)
10029     - four buffer attributes (in|out_data|text), that represent the
10030       input (to the external script) in text and data structure format,
10031       and the output from it, again in two formats
10032     - the result variables from the script (success, info, nodes) for
10033       easy usage
10034
10035   """
10036   # pylint: disable-msg=R0902
10037   # lots of instance attributes
10038   _ALLO_KEYS = [
10039     "name", "mem_size", "disks", "disk_template",
10040     "os", "tags", "nics", "vcpus", "hypervisor",
10041     ]
10042   _RELO_KEYS = [
10043     "name", "relocate_from",
10044     ]
10045   _EVAC_KEYS = [
10046     "evac_nodes",
10047     ]
10048
10049   def __init__(self, cfg, rpc, mode, **kwargs):
10050     self.cfg = cfg
10051     self.rpc = rpc
10052     # init buffer variables
10053     self.in_text = self.out_text = self.in_data = self.out_data = None
10054     # init all input fields so that pylint is happy
10055     self.mode = mode
10056     self.mem_size = self.disks = self.disk_template = None
10057     self.os = self.tags = self.nics = self.vcpus = None
10058     self.hypervisor = None
10059     self.relocate_from = None
10060     self.name = None
10061     self.evac_nodes = None
10062     # computed fields
10063     self.required_nodes = None
10064     # init result fields
10065     self.success = self.info = self.result = None
10066     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10067       keyset = self._ALLO_KEYS
10068       fn = self._AddNewInstance
10069     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10070       keyset = self._RELO_KEYS
10071       fn = self._AddRelocateInstance
10072     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10073       keyset = self._EVAC_KEYS
10074       fn = self._AddEvacuateNodes
10075     else:
10076       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
10077                                    " IAllocator" % self.mode)
10078     for key in kwargs:
10079       if key not in keyset:
10080         raise errors.ProgrammerError("Invalid input parameter '%s' to"
10081                                      " IAllocator" % key)
10082       setattr(self, key, kwargs[key])
10083
10084     for key in keyset:
10085       if key not in kwargs:
10086         raise errors.ProgrammerError("Missing input parameter '%s' to"
10087                                      " IAllocator" % key)
10088     self._BuildInputData(fn)
10089
10090   def _ComputeClusterData(self):
10091     """Compute the generic allocator input data.
10092
10093     This is the data that is independent of the actual operation.
10094
10095     """
10096     cfg = self.cfg
10097     cluster_info = cfg.GetClusterInfo()
10098     # cluster data
10099     data = {
10100       "version": constants.IALLOCATOR_VERSION,
10101       "cluster_name": cfg.GetClusterName(),
10102       "cluster_tags": list(cluster_info.GetTags()),
10103       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
10104       # we don't have job IDs
10105       }
10106     iinfo = cfg.GetAllInstancesInfo().values()
10107     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
10108
10109     # node data
10110     node_results = {}
10111     node_list = cfg.GetNodeList()
10112
10113     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
10114       hypervisor_name = self.hypervisor
10115     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
10116       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
10117     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
10118       hypervisor_name = cluster_info.enabled_hypervisors[0]
10119
10120     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
10121                                         hypervisor_name)
10122     node_iinfo = \
10123       self.rpc.call_all_instances_info(node_list,
10124                                        cluster_info.enabled_hypervisors)
10125     for nname, nresult in node_data.items():
10126       # first fill in static (config-based) values
10127       ninfo = cfg.GetNodeInfo(nname)
10128       pnr = {
10129         "tags": list(ninfo.GetTags()),
10130         "primary_ip": ninfo.primary_ip,
10131         "secondary_ip": ninfo.secondary_ip,
10132         "offline": ninfo.offline,
10133         "drained": ninfo.drained,
10134         "master_candidate": ninfo.master_candidate,
10135         }
10136
10137       if not (ninfo.offline or ninfo.drained):
10138         nresult.Raise("Can't get data for node %s" % nname)
10139         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
10140                                 nname)
10141         remote_info = nresult.payload
10142
10143         for attr in ['memory_total', 'memory_free', 'memory_dom0',
10144                      'vg_size', 'vg_free', 'cpu_total']:
10145           if attr not in remote_info:
10146             raise errors.OpExecError("Node '%s' didn't return attribute"
10147                                      " '%s'" % (nname, attr))
10148           if not isinstance(remote_info[attr], int):
10149             raise errors.OpExecError("Node '%s' returned invalid value"
10150                                      " for '%s': %s" %
10151                                      (nname, attr, remote_info[attr]))
10152         # compute memory used by primary instances
10153         i_p_mem = i_p_up_mem = 0
10154         for iinfo, beinfo in i_list:
10155           if iinfo.primary_node == nname:
10156             i_p_mem += beinfo[constants.BE_MEMORY]
10157             if iinfo.name not in node_iinfo[nname].payload:
10158               i_used_mem = 0
10159             else:
10160               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
10161             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
10162             remote_info['memory_free'] -= max(0, i_mem_diff)
10163
10164             if iinfo.admin_up:
10165               i_p_up_mem += beinfo[constants.BE_MEMORY]
10166
10167         # compute memory used by instances
10168         pnr_dyn = {
10169           "total_memory": remote_info['memory_total'],
10170           "reserved_memory": remote_info['memory_dom0'],
10171           "free_memory": remote_info['memory_free'],
10172           "total_disk": remote_info['vg_size'],
10173           "free_disk": remote_info['vg_free'],
10174           "total_cpus": remote_info['cpu_total'],
10175           "i_pri_memory": i_p_mem,
10176           "i_pri_up_memory": i_p_up_mem,
10177           }
10178         pnr.update(pnr_dyn)
10179
10180       node_results[nname] = pnr
10181     data["nodes"] = node_results
10182
10183     # instance data
10184     instance_data = {}
10185     for iinfo, beinfo in i_list:
10186       nic_data = []
10187       for nic in iinfo.nics:
10188         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
10189         nic_dict = {"mac": nic.mac,
10190                     "ip": nic.ip,
10191                     "mode": filled_params[constants.NIC_MODE],
10192                     "link": filled_params[constants.NIC_LINK],
10193                    }
10194         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
10195           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
10196         nic_data.append(nic_dict)
10197       pir = {
10198         "tags": list(iinfo.GetTags()),
10199         "admin_up": iinfo.admin_up,
10200         "vcpus": beinfo[constants.BE_VCPUS],
10201         "memory": beinfo[constants.BE_MEMORY],
10202         "os": iinfo.os,
10203         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
10204         "nics": nic_data,
10205         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
10206         "disk_template": iinfo.disk_template,
10207         "hypervisor": iinfo.hypervisor,
10208         }
10209       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
10210                                                  pir["disks"])
10211       instance_data[iinfo.name] = pir
10212
10213     data["instances"] = instance_data
10214
10215     self.in_data = data
10216
10217   def _AddNewInstance(self):
10218     """Add new instance data to allocator structure.
10219
10220     This in combination with _AllocatorGetClusterData will create the
10221     correct structure needed as input for the allocator.
10222
10223     The checks for the completeness of the opcode must have already been
10224     done.
10225
10226     """
10227     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
10228
10229     if self.disk_template in constants.DTS_NET_MIRROR:
10230       self.required_nodes = 2
10231     else:
10232       self.required_nodes = 1
10233     request = {
10234       "name": self.name,
10235       "disk_template": self.disk_template,
10236       "tags": self.tags,
10237       "os": self.os,
10238       "vcpus": self.vcpus,
10239       "memory": self.mem_size,
10240       "disks": self.disks,
10241       "disk_space_total": disk_space,
10242       "nics": self.nics,
10243       "required_nodes": self.required_nodes,
10244       }
10245     return request
10246
10247   def _AddRelocateInstance(self):
10248     """Add relocate instance data to allocator structure.
10249
10250     This in combination with _IAllocatorGetClusterData will create the
10251     correct structure needed as input for the allocator.
10252
10253     The checks for the completeness of the opcode must have already been
10254     done.
10255
10256     """
10257     instance = self.cfg.GetInstanceInfo(self.name)
10258     if instance is None:
10259       raise errors.ProgrammerError("Unknown instance '%s' passed to"
10260                                    " IAllocator" % self.name)
10261
10262     if instance.disk_template not in constants.DTS_NET_MIRROR:
10263       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10264                                  errors.ECODE_INVAL)
10265
10266     if len(instance.secondary_nodes) != 1:
10267       raise errors.OpPrereqError("Instance has not exactly one secondary node",
10268                                  errors.ECODE_STATE)
10269
10270     self.required_nodes = 1
10271     disk_sizes = [{'size': disk.size} for disk in instance.disks]
10272     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10273
10274     request = {
10275       "name": self.name,
10276       "disk_space_total": disk_space,
10277       "required_nodes": self.required_nodes,
10278       "relocate_from": self.relocate_from,
10279       }
10280     return request
10281
10282   def _AddEvacuateNodes(self):
10283     """Add evacuate nodes data to allocator structure.
10284
10285     """
10286     request = {
10287       "evac_nodes": self.evac_nodes
10288       }
10289     return request
10290
10291   def _BuildInputData(self, fn):
10292     """Build input data structures.
10293
10294     """
10295     self._ComputeClusterData()
10296
10297     request = fn()
10298     request["type"] = self.mode
10299     self.in_data["request"] = request
10300
10301     self.in_text = serializer.Dump(self.in_data)
10302
10303   def Run(self, name, validate=True, call_fn=None):
10304     """Run an instance allocator and return the results.
10305
10306     """
10307     if call_fn is None:
10308       call_fn = self.rpc.call_iallocator_runner
10309
10310     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10311     result.Raise("Failure while running the iallocator script")
10312
10313     self.out_text = result.payload
10314     if validate:
10315       self._ValidateResult()
10316
10317   def _ValidateResult(self):
10318     """Process the allocator results.
10319
10320     This will process and if successful save the result in
10321     self.out_data and the other parameters.
10322
10323     """
10324     try:
10325       rdict = serializer.Load(self.out_text)
10326     except Exception, err:
10327       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10328
10329     if not isinstance(rdict, dict):
10330       raise errors.OpExecError("Can't parse iallocator results: not a dict")
10331
10332     # TODO: remove backwards compatiblity in later versions
10333     if "nodes" in rdict and "result" not in rdict:
10334       rdict["result"] = rdict["nodes"]
10335       del rdict["nodes"]
10336
10337     for key in "success", "info", "result":
10338       if key not in rdict:
10339         raise errors.OpExecError("Can't parse iallocator results:"
10340                                  " missing key '%s'" % key)
10341       setattr(self, key, rdict[key])
10342
10343     if not isinstance(rdict["result"], list):
10344       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10345                                " is not a list")
10346     self.out_data = rdict
10347
10348
10349 class LUTestAllocator(NoHooksLU):
10350   """Run allocator tests.
10351
10352   This LU runs the allocator tests
10353
10354   """
10355   _OP_PARAMS = [
10356     ("direction", ht.NoDefault,
10357      ht.TElemOf(constants.VALID_IALLOCATOR_DIRECTIONS)),
10358     ("mode", ht.NoDefault, ht.TElemOf(constants.VALID_IALLOCATOR_MODES)),
10359     ("name", ht.NoDefault, ht.TNonEmptyString),
10360     ("nics", ht.NoDefault, ht.TOr(ht.TNone, ht.TListOf(
10361       ht.TDictOf(ht.TElemOf(["mac", "ip", "bridge"]),
10362                ht.TOr(ht.TNone, ht.TNonEmptyString))))),
10363     ("disks", ht.NoDefault, ht.TOr(ht.TNone, ht.TList)),
10364     ("hypervisor", None, ht.TMaybeString),
10365     ("allocator", None, ht.TMaybeString),
10366     ("tags", ht.EmptyList, ht.TListOf(ht.TNonEmptyString)),
10367     ("mem_size", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10368     ("vcpus", None, ht.TOr(ht.TNone, ht.TPositiveInt)),
10369     ("os", None, ht.TMaybeString),
10370     ("disk_template", None, ht.TMaybeString),
10371     ("evac_nodes", None, ht.TOr(ht.TNone, ht.TListOf(ht.TNonEmptyString))),
10372     ]
10373
10374   def CheckPrereq(self):
10375     """Check prerequisites.
10376
10377     This checks the opcode parameters depending on the director and mode test.
10378
10379     """
10380     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10381       for attr in ["mem_size", "disks", "disk_template",
10382                    "os", "tags", "nics", "vcpus"]:
10383         if not hasattr(self.op, attr):
10384           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10385                                      attr, errors.ECODE_INVAL)
10386       iname = self.cfg.ExpandInstanceName(self.op.name)
10387       if iname is not None:
10388         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10389                                    iname, errors.ECODE_EXISTS)
10390       if not isinstance(self.op.nics, list):
10391         raise errors.OpPrereqError("Invalid parameter 'nics'",
10392                                    errors.ECODE_INVAL)
10393       if not isinstance(self.op.disks, list):
10394         raise errors.OpPrereqError("Invalid parameter 'disks'",
10395                                    errors.ECODE_INVAL)
10396       for row in self.op.disks:
10397         if (not isinstance(row, dict) or
10398             "size" not in row or
10399             not isinstance(row["size"], int) or
10400             "mode" not in row or
10401             row["mode"] not in ['r', 'w']):
10402           raise errors.OpPrereqError("Invalid contents of the 'disks'"
10403                                      " parameter", errors.ECODE_INVAL)
10404       if self.op.hypervisor is None:
10405         self.op.hypervisor = self.cfg.GetHypervisorType()
10406     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10407       fname = _ExpandInstanceName(self.cfg, self.op.name)
10408       self.op.name = fname
10409       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10410     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10411       if not hasattr(self.op, "evac_nodes"):
10412         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10413                                    " opcode input", errors.ECODE_INVAL)
10414     else:
10415       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10416                                  self.op.mode, errors.ECODE_INVAL)
10417
10418     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10419       if self.op.allocator is None:
10420         raise errors.OpPrereqError("Missing allocator name",
10421                                    errors.ECODE_INVAL)
10422     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10423       raise errors.OpPrereqError("Wrong allocator test '%s'" %
10424                                  self.op.direction, errors.ECODE_INVAL)
10425
10426   def Exec(self, feedback_fn):
10427     """Run the allocator test.
10428
10429     """
10430     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10431       ial = IAllocator(self.cfg, self.rpc,
10432                        mode=self.op.mode,
10433                        name=self.op.name,
10434                        mem_size=self.op.mem_size,
10435                        disks=self.op.disks,
10436                        disk_template=self.op.disk_template,
10437                        os=self.op.os,
10438                        tags=self.op.tags,
10439                        nics=self.op.nics,
10440                        vcpus=self.op.vcpus,
10441                        hypervisor=self.op.hypervisor,
10442                        )
10443     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10444       ial = IAllocator(self.cfg, self.rpc,
10445                        mode=self.op.mode,
10446                        name=self.op.name,
10447                        relocate_from=list(self.relocate_from),
10448                        )
10449     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10450       ial = IAllocator(self.cfg, self.rpc,
10451                        mode=self.op.mode,
10452                        evac_nodes=self.op.evac_nodes)
10453     else:
10454       raise errors.ProgrammerError("Uncatched mode %s in"
10455                                    " LUTestAllocator.Exec", self.op.mode)
10456
10457     if self.op.direction == constants.IALLOCATOR_DIR_IN:
10458       result = ial.in_text
10459     else:
10460       ial.Run(self.op.allocator, validate=False)
10461       result = ial.out_text
10462     return result