code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39
  40 from ganeti import ssh
  41 from ganeti import utils
  42 from ganeti import errors
  43 from ganeti import hypervisor
  44 from ganeti import locking
  45 from ganeti import constants
  46 from ganeti import objects
  47 from ganeti import serializer
  48 from ganeti import ssconf
  49 from ganeti import uidpool
  50 from ganeti import compat
  51 from ganeti import masterd
  52
  53 import ganeti.masterd.instance # pylint: disable-msg=W0611
  54
  55
  56 # need to define these here before the actual LUs
  57
  58 def _EmptyList():
  59   """Returns an empty list.
  60
  61   """
  62   return []
  63
  64
  65 def _EmptyDict():
  66   """Returns an empty dict.
  67
  68   """
  69   return {}
  70
  71
  72 class LogicalUnit(object):
  73   """Logical Unit base class.
  74
  75   Subclasses must follow these rules:
  76     - implement ExpandNames
  77     - implement CheckPrereq (except when tasklets are used)
  78     - implement Exec (except when tasklets are used)
  79     - implement BuildHooksEnv
  80     - redefine HPATH and HTYPE
  81     - optionally redefine their run requirements:
  82         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  83
  84   Note that all commands require root permissions.
  85
  86   @ivar dry_run_result: the value (if any) that will be returned to the caller
  87       in dry-run mode (signalled by opcode dry_run parameter)
  88   @cvar _OP_DEFS: a list of opcode attributes and the defaults values
  89       they should get if not already existing
  90
  91   """
  92   HPATH = None
  93   HTYPE = None
  94   _OP_REQP = []
  95   _OP_DEFS = []
  96   REQ_BGL = True
  97
  98   def __init__(self, processor, op, context, rpc):
  99     """Constructor for LogicalUnit.
 100
 101     This needs to be overridden in derived classes in order to check op
 102     validity.
 103
 104     """
 105     self.proc = processor
 106     self.op = op
 107     self.cfg = context.cfg
 108     self.context = context
 109     self.rpc = rpc
 110     # Dicts used to declare locking needs to mcpu
 111     self.needed_locks = None
 112     self.acquired_locks = {}
 113     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 114     self.add_locks = {}
 115     self.remove_locks = {}
 116     # Used to force good behavior when calling helper functions
 117     self.recalculate_locks = {}
 118     self.__ssh = None
 119     # logging
 120     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 121     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 122     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 123     # support for dry-run
 124     self.dry_run_result = None
 125     # support for generic debug attribute
 126     if (not hasattr(self.op, "debug_level") or
 127         not isinstance(self.op.debug_level, int)):
 128       self.op.debug_level = 0
 129
 130     # Tasklets
 131     self.tasklets = None
 132
 133     for aname, aval in self._OP_DEFS:
 134       if not hasattr(self.op, aname):
 135         if callable(aval):
 136           dval = aval()
 137         else:
 138           dval = aval
 139         setattr(self.op, aname, dval)
 140
 141     for attr_name in self._OP_REQP:
 142       attr_val = getattr(op, attr_name, None)
 143       if attr_val is None:
 144         raise errors.OpPrereqError("Required parameter '%s' missing" %
 145                                    attr_name, errors.ECODE_INVAL)
 146
 147     self.CheckArguments()
 148
 149   def __GetSSH(self):
 150     """Returns the SshRunner object
 151
 152     """
 153     if not self.__ssh:
 154       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 155     return self.__ssh
 156
 157   ssh = property(fget=__GetSSH)
 158
 159   def CheckArguments(self):
 160     """Check syntactic validity for the opcode arguments.
 161
 162     This method is for doing a simple syntactic check and ensure
 163     validity of opcode parameters, without any cluster-related
 164     checks. While the same can be accomplished in ExpandNames and/or
 165     CheckPrereq, doing these separate is better because:
 166
 167       - ExpandNames is left as as purely a lock-related function
 168       - CheckPrereq is run after we have acquired locks (and possible
 169         waited for them)
 170
 171     The function is allowed to change the self.op attribute so that
 172     later methods can no longer worry about missing parameters.
 173
 174     """
 175     pass
 176
 177   def ExpandNames(self):
 178     """Expand names for this LU.
 179
 180     This method is called before starting to execute the opcode, and it should
 181     update all the parameters of the opcode to their canonical form (e.g. a
 182     short node name must be fully expanded after this method has successfully
 183     completed). This way locking, hooks, logging, ecc. can work correctly.
 184
 185     LUs which implement this method must also populate the self.needed_locks
 186     member, as a dict with lock levels as keys, and a list of needed lock names
 187     as values. Rules:
 188
 189       - use an empty dict if you don't need any lock
 190       - if you don't need any lock at a particular level omit that level
 191       - don't put anything for the BGL level
 192       - if you want all locks at a level use locking.ALL_SET as a value
 193
 194     If you need to share locks (rather than acquire them exclusively) at one
 195     level you can modify self.share_locks, setting a true value (usually 1) for
 196     that level. By default locks are not shared.
 197
 198     This function can also define a list of tasklets, which then will be
 199     executed in order instead of the usual LU-level CheckPrereq and Exec
 200     functions, if those are not defined by the LU.
 201
 202     Examples::
 203
 204       # Acquire all nodes and one instance
 205       self.needed_locks = {
 206         locking.LEVEL_NODE: locking.ALL_SET,
 207         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 208       }
 209       # Acquire just two nodes
 210       self.needed_locks = {
 211         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 212       }
 213       # Acquire no locks
 214       self.needed_locks = {} # No, you can't leave it to the default value None
 215
 216     """
 217     # The implementation of this method is mandatory only if the new LU is
 218     # concurrent, so that old LUs don't need to be changed all at the same
 219     # time.
 220     if self.REQ_BGL:
 221       self.needed_locks = {} # Exclusive LUs don't need locks.
 222     else:
 223       raise NotImplementedError
 224
 225   def DeclareLocks(self, level):
 226     """Declare LU locking needs for a level
 227
 228     While most LUs can just declare their locking needs at ExpandNames time,
 229     sometimes there's the need to calculate some locks after having acquired
 230     the ones before. This function is called just before acquiring locks at a
 231     particular level, but after acquiring the ones at lower levels, and permits
 232     such calculations. It can be used to modify self.needed_locks, and by
 233     default it does nothing.
 234
 235     This function is only called if you have something already set in
 236     self.needed_locks for the level.
 237
 238     @param level: Locking level which is going to be locked
 239     @type level: member of ganeti.locking.LEVELS
 240
 241     """
 242
 243   def CheckPrereq(self):
 244     """Check prerequisites for this LU.
 245
 246     This method should check that the prerequisites for the execution
 247     of this LU are fulfilled. It can do internode communication, but
 248     it should be idempotent - no cluster or system changes are
 249     allowed.
 250
 251     The method should raise errors.OpPrereqError in case something is
 252     not fulfilled. Its return value is ignored.
 253
 254     This method should also update all the parameters of the opcode to
 255     their canonical form if it hasn't been done by ExpandNames before.
 256
 257     """
 258     if self.tasklets is not None:
 259       for (idx, tl) in enumerate(self.tasklets):
 260         logging.debug("Checking prerequisites for tasklet %s/%s",
 261                       idx + 1, len(self.tasklets))
 262         tl.CheckPrereq()
 263     else:
 264       raise NotImplementedError
 265
 266   def Exec(self, feedback_fn):
 267     """Execute the LU.
 268
 269     This method should implement the actual work. It should raise
 270     errors.OpExecError for failures that are somewhat dealt with in
 271     code, or expected.
 272
 273     """
 274     if self.tasklets is not None:
 275       for (idx, tl) in enumerate(self.tasklets):
 276         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 277         tl.Exec(feedback_fn)
 278     else:
 279       raise NotImplementedError
 280
 281   def BuildHooksEnv(self):
 282     """Build hooks environment for this LU.
 283
 284     This method should return a three-node tuple consisting of: a dict
 285     containing the environment that will be used for running the
 286     specific hook for this LU, a list of node names on which the hook
 287     should run before the execution, and a list of node names on which
 288     the hook should run after the execution.
 289
 290     The keys of the dict must not have 'GANETI_' prefixed as this will
 291     be handled in the hooks runner. Also note additional keys will be
 292     added by the hooks runner. If the LU doesn't define any
 293     environment, an empty dict (and not None) should be returned.
 294
 295     No nodes should be returned as an empty list (and not None).
 296
 297     Note that if the HPATH for a LU class is None, this function will
 298     not be called.
 299
 300     """
 301     raise NotImplementedError
 302
 303   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 304     """Notify the LU about the results of its hooks.
 305
 306     This method is called every time a hooks phase is executed, and notifies
 307     the Logical Unit about the hooks' result. The LU can then use it to alter
 308     its result based on the hooks.  By default the method does nothing and the
 309     previous result is passed back unchanged but any LU can define it if it
 310     wants to use the local cluster hook-scripts somehow.
 311
 312     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 313         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 314     @param hook_results: the results of the multi-node hooks rpc call
 315     @param feedback_fn: function used send feedback back to the caller
 316     @param lu_result: the previous Exec result this LU had, or None
 317         in the PRE phase
 318     @return: the new Exec result, based on the previous result
 319         and hook results
 320
 321     """
 322     # API must be kept, thus we ignore the unused argument and could
 323     # be a function warnings
 324     # pylint: disable-msg=W0613,R0201
 325     return lu_result
 326
 327   def _ExpandAndLockInstance(self):
 328     """Helper function to expand and lock an instance.
 329
 330     Many LUs that work on an instance take its name in self.op.instance_name
 331     and need to expand it and then declare the expanded name for locking. This
 332     function does it, and then updates self.op.instance_name to the expanded
 333     name. It also initializes needed_locks as a dict, if this hasn't been done
 334     before.
 335
 336     """
 337     if self.needed_locks is None:
 338       self.needed_locks = {}
 339     else:
 340       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 341         "_ExpandAndLockInstance called with instance-level locks set"
 342     self.op.instance_name = _ExpandInstanceName(self.cfg,
 343                                                 self.op.instance_name)
 344     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 345
 346   def _LockInstancesNodes(self, primary_only=False):
 347     """Helper function to declare instances' nodes for locking.
 348
 349     This function should be called after locking one or more instances to lock
 350     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 351     with all primary or secondary nodes for instances already locked and
 352     present in self.needed_locks[locking.LEVEL_INSTANCE].
 353
 354     It should be called from DeclareLocks, and for safety only works if
 355     self.recalculate_locks[locking.LEVEL_NODE] is set.
 356
 357     In the future it may grow parameters to just lock some instance's nodes, or
 358     to just lock primaries or secondary nodes, if needed.
 359
 360     If should be called in DeclareLocks in a way similar to::
 361
 362       if level == locking.LEVEL_NODE:
 363         self._LockInstancesNodes()
 364
 365     @type primary_only: boolean
 366     @param primary_only: only lock primary nodes of locked instances
 367
 368     """
 369     assert locking.LEVEL_NODE in self.recalculate_locks, \
 370       "_LockInstancesNodes helper function called with no nodes to recalculate"
 371
 372     # TODO: check if we're really been called with the instance locks held
 373
 374     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 375     # future we might want to have different behaviors depending on the value
 376     # of self.recalculate_locks[locking.LEVEL_NODE]
 377     wanted_nodes = []
 378     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 379       instance = self.context.cfg.GetInstanceInfo(instance_name)
 380       wanted_nodes.append(instance.primary_node)
 381       if not primary_only:
 382         wanted_nodes.extend(instance.secondary_nodes)
 383
 384     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 385       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 386     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 387       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 388
 389     del self.recalculate_locks[locking.LEVEL_NODE]
 390
 391
 392 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 393   """Simple LU which runs no hooks.
 394
 395   This LU is intended as a parent for other LogicalUnits which will
 396   run no hooks, in order to reduce duplicate code.
 397
 398   """
 399   HPATH = None
 400   HTYPE = None
 401
 402   def BuildHooksEnv(self):
 403     """Empty BuildHooksEnv for NoHooksLu.
 404
 405     This just raises an error.
 406
 407     """
 408     assert False, "BuildHooksEnv called for NoHooksLUs"
 409
 410
 411 class Tasklet:
 412   """Tasklet base class.
 413
 414   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 415   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 416   tasklets know nothing about locks.
 417
 418   Subclasses must follow these rules:
 419     - Implement CheckPrereq
 420     - Implement Exec
 421
 422   """
 423   def __init__(self, lu):
 424     self.lu = lu
 425
 426     # Shortcuts
 427     self.cfg = lu.cfg
 428     self.rpc = lu.rpc
 429
 430   def CheckPrereq(self):
 431     """Check prerequisites for this tasklets.
 432
 433     This method should check whether the prerequisites for the execution of
 434     this tasklet are fulfilled. It can do internode communication, but it
 435     should be idempotent - no cluster or system changes are allowed.
 436
 437     The method should raise errors.OpPrereqError in case something is not
 438     fulfilled. Its return value is ignored.
 439
 440     This method should also update all parameters to their canonical form if it
 441     hasn't been done before.
 442
 443     """
 444     raise NotImplementedError
 445
 446   def Exec(self, feedback_fn):
 447     """Execute the tasklet.
 448
 449     This method should implement the actual work. It should raise
 450     errors.OpExecError for failures that are somewhat dealt with in code, or
 451     expected.
 452
 453     """
 454     raise NotImplementedError
 455
 456
 457 def _GetWantedNodes(lu, nodes):
 458   """Returns list of checked and expanded node names.
 459
 460   @type lu: L{LogicalUnit}
 461   @param lu: the logical unit on whose behalf we execute
 462   @type nodes: list
 463   @param nodes: list of node names or None for all nodes
 464   @rtype: list
 465   @return: the list of nodes, sorted
 466   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 467
 468   """
 469   if not isinstance(nodes, list):
 470     raise errors.OpPrereqError("Invalid argument type 'nodes'",
 471                                errors.ECODE_INVAL)
 472
 473   if not nodes:
 474     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 475       " non-empty list of nodes whose name is to be expanded.")
 476
 477   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 478   return utils.NiceSort(wanted)
 479
 480
 481 def _GetWantedInstances(lu, instances):
 482   """Returns list of checked and expanded instance names.
 483
 484   @type lu: L{LogicalUnit}
 485   @param lu: the logical unit on whose behalf we execute
 486   @type instances: list
 487   @param instances: list of instance names or None for all instances
 488   @rtype: list
 489   @return: the list of instances, sorted
 490   @raise errors.OpPrereqError: if the instances parameter is wrong type
 491   @raise errors.OpPrereqError: if any of the passed instances is not found
 492
 493   """
 494   if not isinstance(instances, list):
 495     raise errors.OpPrereqError("Invalid argument type 'instances'",
 496                                errors.ECODE_INVAL)
 497
 498   if instances:
 499     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 500   else:
 501     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 502   return wanted
 503
 504
 505 def _GetUpdatedParams(old_params, update_dict,
 506                       use_default=True, use_none=False):
 507   """Return the new version of a parameter dictionary.
 508
 509   @type old_params: dict
 510   @param old_params: old parameters
 511   @type update_dict: dict
 512   @param update_dict: dict containing new parameter values, or
 513       constants.VALUE_DEFAULT to reset the parameter to its default
 514       value
 515   @param use_default: boolean
 516   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 517       values as 'to be deleted' values
 518   @param use_none: boolean
 519   @type use_none: whether to recognise C{None} values as 'to be
 520       deleted' values
 521   @rtype: dict
 522   @return: the new parameter dictionary
 523
 524   """
 525   params_copy = copy.deepcopy(old_params)
 526   for key, val in update_dict.iteritems():
 527     if ((use_default and val == constants.VALUE_DEFAULT) or
 528         (use_none and val is None)):
 529       try:
 530         del params_copy[key]
 531       except KeyError:
 532         pass
 533     else:
 534       params_copy[key] = val
 535   return params_copy
 536
 537
 538 def _CheckOutputFields(static, dynamic, selected):
 539   """Checks whether all selected fields are valid.
 540
 541   @type static: L{utils.FieldSet}
 542   @param static: static fields set
 543   @type dynamic: L{utils.FieldSet}
 544   @param dynamic: dynamic fields set
 545
 546   """
 547   f = utils.FieldSet()
 548   f.Extend(static)
 549   f.Extend(dynamic)
 550
 551   delta = f.NonMatching(selected)
 552   if delta:
 553     raise errors.OpPrereqError("Unknown output fields selected: %s"
 554                                % ",".join(delta), errors.ECODE_INVAL)
 555
 556
 557 def _CheckBooleanOpField(op, name):
 558   """Validates boolean opcode parameters.
 559
 560   This will ensure that an opcode parameter is either a boolean value,
 561   or None (but that it always exists).
 562
 563   """
 564   val = getattr(op, name, None)
 565   if not (val is None or isinstance(val, bool)):
 566     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 567                                (name, str(val)), errors.ECODE_INVAL)
 568   setattr(op, name, val)
 569
 570
 571 def _CheckGlobalHvParams(params):
 572   """Validates that given hypervisor params are not global ones.
 573
 574   This will ensure that instances don't get customised versions of
 575   global params.
 576
 577   """
 578   used_globals = constants.HVC_GLOBALS.intersection(params)
 579   if used_globals:
 580     msg = ("The following hypervisor parameters are global and cannot"
 581            " be customized at instance level, please modify them at"
 582            " cluster level: %s" % utils.CommaJoin(used_globals))
 583     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 584
 585
 586 def _CheckNodeOnline(lu, node):
 587   """Ensure that a given node is online.
 588
 589   @param lu: the LU on behalf of which we make the check
 590   @param node: the node to check
 591   @raise errors.OpPrereqError: if the node is offline
 592
 593   """
 594   if lu.cfg.GetNodeInfo(node).offline:
 595     raise errors.OpPrereqError("Can't use offline node %s" % node,
 596                                errors.ECODE_INVAL)
 597
 598
 599 def _CheckNodeNotDrained(lu, node):
 600   """Ensure that a given node is not drained.
 601
 602   @param lu: the LU on behalf of which we make the check
 603   @param node: the node to check
 604   @raise errors.OpPrereqError: if the node is drained
 605
 606   """
 607   if lu.cfg.GetNodeInfo(node).drained:
 608     raise errors.OpPrereqError("Can't use drained node %s" % node,
 609                                errors.ECODE_INVAL)
 610
 611
 612 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 613   """Ensure that a node supports a given OS.
 614
 615   @param lu: the LU on behalf of which we make the check
 616   @param node: the node to check
 617   @param os_name: the OS to query about
 618   @param force_variant: whether to ignore variant errors
 619   @raise errors.OpPrereqError: if the node is not supporting the OS
 620
 621   """
 622   result = lu.rpc.call_os_get(node, os_name)
 623   result.Raise("OS '%s' not in supported OS list for node %s" %
 624                (os_name, node),
 625                prereq=True, ecode=errors.ECODE_INVAL)
 626   if not force_variant:
 627     _CheckOSVariant(result.payload, os_name)
 628
 629
 630 def _RequireFileStorage():
 631   """Checks that file storage is enabled.
 632
 633   @raise errors.OpPrereqError: when file storage is disabled
 634
 635   """
 636   if not constants.ENABLE_FILE_STORAGE:
 637     raise errors.OpPrereqError("File storage disabled at configure time",
 638                                errors.ECODE_INVAL)
 639
 640
 641 def _CheckDiskTemplate(template):
 642   """Ensure a given disk template is valid.
 643
 644   """
 645   if template not in constants.DISK_TEMPLATES:
 646     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 647            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 648     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 649   if template == constants.DT_FILE:
 650     _RequireFileStorage()
 651
 652
 653 def _CheckStorageType(storage_type):
 654   """Ensure a given storage type is valid.
 655
 656   """
 657   if storage_type not in constants.VALID_STORAGE_TYPES:
 658     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 659                                errors.ECODE_INVAL)
 660   if storage_type == constants.ST_FILE:
 661     _RequireFileStorage()
 662
 663
 664 def _GetClusterDomainSecret():
 665   """Reads the cluster domain secret.
 666
 667   """
 668   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 669                                strict=True)
 670
 671
 672 def _CheckInstanceDown(lu, instance, reason):
 673   """Ensure that an instance is not running."""
 674   if instance.admin_up:
 675     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 676                                (instance.name, reason), errors.ECODE_STATE)
 677
 678   pnode = instance.primary_node
 679   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 680   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 681               prereq=True, ecode=errors.ECODE_ENVIRON)
 682
 683   if instance.name in ins_l.payload:
 684     raise errors.OpPrereqError("Instance %s is running, %s" %
 685                                (instance.name, reason), errors.ECODE_STATE)
 686
 687
 688 def _ExpandItemName(fn, name, kind):
 689   """Expand an item name.
 690
 691   @param fn: the function to use for expansion
 692   @param name: requested item name
 693   @param kind: text description ('Node' or 'Instance')
 694   @return: the resolved (full) name
 695   @raise errors.OpPrereqError: if the item is not found
 696
 697   """
 698   full_name = fn(name)
 699   if full_name is None:
 700     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 701                                errors.ECODE_NOENT)
 702   return full_name
 703
 704
 705 def _ExpandNodeName(cfg, name):
 706   """Wrapper over L{_ExpandItemName} for nodes."""
 707   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 708
 709
 710 def _ExpandInstanceName(cfg, name):
 711   """Wrapper over L{_ExpandItemName} for instance."""
 712   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 713
 714
 715 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 716                           memory, vcpus, nics, disk_template, disks,
 717                           bep, hvp, hypervisor_name):
 718   """Builds instance related env variables for hooks
 719
 720   This builds the hook environment from individual variables.
 721
 722   @type name: string
 723   @param name: the name of the instance
 724   @type primary_node: string
 725   @param primary_node: the name of the instance's primary node
 726   @type secondary_nodes: list
 727   @param secondary_nodes: list of secondary nodes as strings
 728   @type os_type: string
 729   @param os_type: the name of the instance's OS
 730   @type status: boolean
 731   @param status: the should_run status of the instance
 732   @type memory: string
 733   @param memory: the memory size of the instance
 734   @type vcpus: string
 735   @param vcpus: the count of VCPUs the instance has
 736   @type nics: list
 737   @param nics: list of tuples (ip, mac, mode, link) representing
 738       the NICs the instance has
 739   @type disk_template: string
 740   @param disk_template: the disk template of the instance
 741   @type disks: list
 742   @param disks: the list of (size, mode) pairs
 743   @type bep: dict
 744   @param bep: the backend parameters for the instance
 745   @type hvp: dict
 746   @param hvp: the hypervisor parameters for the instance
 747   @type hypervisor_name: string
 748   @param hypervisor_name: the hypervisor for the instance
 749   @rtype: dict
 750   @return: the hook environment for this instance
 751
 752   """
 753   if status:
 754     str_status = "up"
 755   else:
 756     str_status = "down"
 757   env = {
 758     "OP_TARGET": name,
 759     "INSTANCE_NAME": name,
 760     "INSTANCE_PRIMARY": primary_node,
 761     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 762     "INSTANCE_OS_TYPE": os_type,
 763     "INSTANCE_STATUS": str_status,
 764     "INSTANCE_MEMORY": memory,
 765     "INSTANCE_VCPUS": vcpus,
 766     "INSTANCE_DISK_TEMPLATE": disk_template,
 767     "INSTANCE_HYPERVISOR": hypervisor_name,
 768   }
 769
 770   if nics:
 771     nic_count = len(nics)
 772     for idx, (ip, mac, mode, link) in enumerate(nics):
 773       if ip is None:
 774         ip = ""
 775       env["INSTANCE_NIC%d_IP" % idx] = ip
 776       env["INSTANCE_NIC%d_MAC" % idx] = mac
 777       env["INSTANCE_NIC%d_MODE" % idx] = mode
 778       env["INSTANCE_NIC%d_LINK" % idx] = link
 779       if mode == constants.NIC_MODE_BRIDGED:
 780         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 781   else:
 782     nic_count = 0
 783
 784   env["INSTANCE_NIC_COUNT"] = nic_count
 785
 786   if disks:
 787     disk_count = len(disks)
 788     for idx, (size, mode) in enumerate(disks):
 789       env["INSTANCE_DISK%d_SIZE" % idx] = size
 790       env["INSTANCE_DISK%d_MODE" % idx] = mode
 791   else:
 792     disk_count = 0
 793
 794   env["INSTANCE_DISK_COUNT"] = disk_count
 795
 796   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 797     for key, value in source.items():
 798       env["INSTANCE_%s_%s" % (kind, key)] = value
 799
 800   return env
 801
 802
 803 def _NICListToTuple(lu, nics):
 804   """Build a list of nic information tuples.
 805
 806   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 807   value in LUQueryInstanceData.
 808
 809   @type lu:  L{LogicalUnit}
 810   @param lu: the logical unit on whose behalf we execute
 811   @type nics: list of L{objects.NIC}
 812   @param nics: list of nics to convert to hooks tuples
 813
 814   """
 815   hooks_nics = []
 816   cluster = lu.cfg.GetClusterInfo()
 817   for nic in nics:
 818     ip = nic.ip
 819     mac = nic.mac
 820     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 821     mode = filled_params[constants.NIC_MODE]
 822     link = filled_params[constants.NIC_LINK]
 823     hooks_nics.append((ip, mac, mode, link))
 824   return hooks_nics
 825
 826
 827 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 828   """Builds instance related env variables for hooks from an object.
 829
 830   @type lu: L{LogicalUnit}
 831   @param lu: the logical unit on whose behalf we execute
 832   @type instance: L{objects.Instance}
 833   @param instance: the instance for which we should build the
 834       environment
 835   @type override: dict
 836   @param override: dictionary with key/values that will override
 837       our values
 838   @rtype: dict
 839   @return: the hook environment dictionary
 840
 841   """
 842   cluster = lu.cfg.GetClusterInfo()
 843   bep = cluster.FillBE(instance)
 844   hvp = cluster.FillHV(instance)
 845   args = {
 846     'name': instance.name,
 847     'primary_node': instance.primary_node,
 848     'secondary_nodes': instance.secondary_nodes,
 849     'os_type': instance.os,
 850     'status': instance.admin_up,
 851     'memory': bep[constants.BE_MEMORY],
 852     'vcpus': bep[constants.BE_VCPUS],
 853     'nics': _NICListToTuple(lu, instance.nics),
 854     'disk_template': instance.disk_template,
 855     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 856     'bep': bep,
 857     'hvp': hvp,
 858     'hypervisor_name': instance.hypervisor,
 859   }
 860   if override:
 861     args.update(override)
 862   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 863
 864
 865 def _AdjustCandidatePool(lu, exceptions):
 866   """Adjust the candidate pool after node operations.
 867
 868   """
 869   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 870   if mod_list:
 871     lu.LogInfo("Promoted nodes to master candidate role: %s",
 872                utils.CommaJoin(node.name for node in mod_list))
 873     for name in mod_list:
 874       lu.context.ReaddNode(name)
 875   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 876   if mc_now > mc_max:
 877     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 878                (mc_now, mc_max))
 879
 880
 881 def _DecideSelfPromotion(lu, exceptions=None):
 882   """Decide whether I should promote myself as a master candidate.
 883
 884   """
 885   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 886   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 887   # the new node will increase mc_max with one, so:
 888   mc_should = min(mc_should + 1, cp_size)
 889   return mc_now < mc_should
 890
 891
 892 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 893   """Check that the brigdes needed by a list of nics exist.
 894
 895   """
 896   cluster = lu.cfg.GetClusterInfo()
 897   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 898   brlist = [params[constants.NIC_LINK] for params in paramslist
 899             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 900   if brlist:
 901     result = lu.rpc.call_bridges_exist(target_node, brlist)
 902     result.Raise("Error checking bridges on destination node '%s'" %
 903                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 904
 905
 906 def _CheckInstanceBridgesExist(lu, instance, node=None):
 907   """Check that the brigdes needed by an instance exist.
 908
 909   """
 910   if node is None:
 911     node = instance.primary_node
 912   _CheckNicsBridgesExist(lu, instance.nics, node)
 913
 914
 915 def _CheckOSVariant(os_obj, name):
 916   """Check whether an OS name conforms to the os variants specification.
 917
 918   @type os_obj: L{objects.OS}
 919   @param os_obj: OS object to check
 920   @type name: string
 921   @param name: OS name passed by the user, to check for validity
 922
 923   """
 924   if not os_obj.supported_variants:
 925     return
 926   try:
 927     variant = name.split("+", 1)[1]
 928   except IndexError:
 929     raise errors.OpPrereqError("OS name must include a variant",
 930                                errors.ECODE_INVAL)
 931
 932   if variant not in os_obj.supported_variants:
 933     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
 934
 935
 936 def _GetNodeInstancesInner(cfg, fn):
 937   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 938
 939
 940 def _GetNodeInstances(cfg, node_name):
 941   """Returns a list of all primary and secondary instances on a node.
 942
 943   """
 944
 945   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
 946
 947
 948 def _GetNodePrimaryInstances(cfg, node_name):
 949   """Returns primary instances on a node.
 950
 951   """
 952   return _GetNodeInstancesInner(cfg,
 953                                 lambda inst: node_name == inst.primary_node)
 954
 955
 956 def _GetNodeSecondaryInstances(cfg, node_name):
 957   """Returns secondary instances on a node.
 958
 959   """
 960   return _GetNodeInstancesInner(cfg,
 961                                 lambda inst: node_name in inst.secondary_nodes)
 962
 963
 964 def _GetStorageTypeArgs(cfg, storage_type):
 965   """Returns the arguments for a storage type.
 966
 967   """
 968   # Special case for file storage
 969   if storage_type == constants.ST_FILE:
 970     # storage.FileStorage wants a list of storage directories
 971     return [[cfg.GetFileStorageDir()]]
 972
 973   return []
 974
 975
 976 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
 977   faulty = []
 978
 979   for dev in instance.disks:
 980     cfg.SetDiskID(dev, node_name)
 981
 982   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
 983   result.Raise("Failed to get disk status from node %s" % node_name,
 984                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 985
 986   for idx, bdev_status in enumerate(result.payload):
 987     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
 988       faulty.append(idx)
 989
 990   return faulty
 991
 992
 993 class LUPostInitCluster(LogicalUnit):
 994   """Logical unit for running hooks after cluster initialization.
 995
 996   """
 997   HPATH = "cluster-init"
 998   HTYPE = constants.HTYPE_CLUSTER
 999   _OP_REQP = []
1000
1001   def BuildHooksEnv(self):
1002     """Build hooks env.
1003
1004     """
1005     env = {"OP_TARGET": self.cfg.GetClusterName()}
1006     mn = self.cfg.GetMasterNode()
1007     return env, [], [mn]
1008
1009   def CheckPrereq(self):
1010     """No prerequisites to check.
1011
1012     """
1013     return True
1014
1015   def Exec(self, feedback_fn):
1016     """Nothing to do.
1017
1018     """
1019     return True
1020
1021
1022 class LUDestroyCluster(LogicalUnit):
1023   """Logical unit for destroying the cluster.
1024
1025   """
1026   HPATH = "cluster-destroy"
1027   HTYPE = constants.HTYPE_CLUSTER
1028   _OP_REQP = []
1029
1030   def BuildHooksEnv(self):
1031     """Build hooks env.
1032
1033     """
1034     env = {"OP_TARGET": self.cfg.GetClusterName()}
1035     return env, [], []
1036
1037   def CheckPrereq(self):
1038     """Check prerequisites.
1039
1040     This checks whether the cluster is empty.
1041
1042     Any errors are signaled by raising errors.OpPrereqError.
1043
1044     """
1045     master = self.cfg.GetMasterNode()
1046
1047     nodelist = self.cfg.GetNodeList()
1048     if len(nodelist) != 1 or nodelist[0] != master:
1049       raise errors.OpPrereqError("There are still %d node(s) in"
1050                                  " this cluster." % (len(nodelist) - 1),
1051                                  errors.ECODE_INVAL)
1052     instancelist = self.cfg.GetInstanceList()
1053     if instancelist:
1054       raise errors.OpPrereqError("There are still %d instance(s) in"
1055                                  " this cluster." % len(instancelist),
1056                                  errors.ECODE_INVAL)
1057
1058   def Exec(self, feedback_fn):
1059     """Destroys the cluster.
1060
1061     """
1062     master = self.cfg.GetMasterNode()
1063     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1064
1065     # Run post hooks on master node before it's removed
1066     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1067     try:
1068       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1069     except:
1070       # pylint: disable-msg=W0702
1071       self.LogWarning("Errors occurred running hooks on %s" % master)
1072
1073     result = self.rpc.call_node_stop_master(master, False)
1074     result.Raise("Could not disable the master role")
1075
1076     if modify_ssh_setup:
1077       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1078       utils.CreateBackup(priv_key)
1079       utils.CreateBackup(pub_key)
1080
1081     return master
1082
1083
1084 def _VerifyCertificate(filename):
1085   """Verifies a certificate for LUVerifyCluster.
1086
1087   @type filename: string
1088   @param filename: Path to PEM file
1089
1090   """
1091   try:
1092     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1093                                            utils.ReadFile(filename))
1094   except Exception, err: # pylint: disable-msg=W0703
1095     return (LUVerifyCluster.ETYPE_ERROR,
1096             "Failed to load X509 certificate %s: %s" % (filename, err))
1097
1098   (errcode, msg) = \
1099     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1100                                 constants.SSL_CERT_EXPIRATION_ERROR)
1101
1102   if msg:
1103     fnamemsg = "While verifying %s: %s" % (filename, msg)
1104   else:
1105     fnamemsg = None
1106
1107   if errcode is None:
1108     return (None, fnamemsg)
1109   elif errcode == utils.CERT_WARNING:
1110     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1111   elif errcode == utils.CERT_ERROR:
1112     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1113
1114   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1115
1116
1117 class LUVerifyCluster(LogicalUnit):
1118   """Verifies the cluster status.
1119
1120   """
1121   HPATH = "cluster-verify"
1122   HTYPE = constants.HTYPE_CLUSTER
1123   _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1124   REQ_BGL = False
1125
1126   TCLUSTER = "cluster"
1127   TNODE = "node"
1128   TINSTANCE = "instance"
1129
1130   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1131   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1132   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1133   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1134   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1135   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1136   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1137   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1138   ENODEDRBD = (TNODE, "ENODEDRBD")
1139   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1140   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1141   ENODEHV = (TNODE, "ENODEHV")
1142   ENODELVM = (TNODE, "ENODELVM")
1143   ENODEN1 = (TNODE, "ENODEN1")
1144   ENODENET = (TNODE, "ENODENET")
1145   ENODEOS = (TNODE, "ENODEOS")
1146   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1147   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1148   ENODERPC = (TNODE, "ENODERPC")
1149   ENODESSH = (TNODE, "ENODESSH")
1150   ENODEVERSION = (TNODE, "ENODEVERSION")
1151   ENODESETUP = (TNODE, "ENODESETUP")
1152   ENODETIME = (TNODE, "ENODETIME")
1153
1154   ETYPE_FIELD = "code"
1155   ETYPE_ERROR = "ERROR"
1156   ETYPE_WARNING = "WARNING"
1157
1158   class NodeImage(object):
1159     """A class representing the logical and physical status of a node.
1160
1161     @type name: string
1162     @ivar name: the node name to which this object refers
1163     @ivar volumes: a structure as returned from
1164         L{ganeti.backend.GetVolumeList} (runtime)
1165     @ivar instances: a list of running instances (runtime)
1166     @ivar pinst: list of configured primary instances (config)
1167     @ivar sinst: list of configured secondary instances (config)
1168     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1169         of this node (config)
1170     @ivar mfree: free memory, as reported by hypervisor (runtime)
1171     @ivar dfree: free disk, as reported by the node (runtime)
1172     @ivar offline: the offline status (config)
1173     @type rpc_fail: boolean
1174     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1175         not whether the individual keys were correct) (runtime)
1176     @type lvm_fail: boolean
1177     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1178     @type hyp_fail: boolean
1179     @ivar hyp_fail: whether the RPC call didn't return the instance list
1180     @type ghost: boolean
1181     @ivar ghost: whether this is a known node or not (config)
1182     @type os_fail: boolean
1183     @ivar os_fail: whether the RPC call didn't return valid OS data
1184     @type oslist: list
1185     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1186
1187     """
1188     def __init__(self, offline=False, name=None):
1189       self.name = name
1190       self.volumes = {}
1191       self.instances = []
1192       self.pinst = []
1193       self.sinst = []
1194       self.sbp = {}
1195       self.mfree = 0
1196       self.dfree = 0
1197       self.offline = offline
1198       self.rpc_fail = False
1199       self.lvm_fail = False
1200       self.hyp_fail = False
1201       self.ghost = False
1202       self.os_fail = False
1203       self.oslist = {}
1204
1205   def ExpandNames(self):
1206     self.needed_locks = {
1207       locking.LEVEL_NODE: locking.ALL_SET,
1208       locking.LEVEL_INSTANCE: locking.ALL_SET,
1209     }
1210     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1211
1212   def _Error(self, ecode, item, msg, *args, **kwargs):
1213     """Format an error message.
1214
1215     Based on the opcode's error_codes parameter, either format a
1216     parseable error code, or a simpler error string.
1217
1218     This must be called only from Exec and functions called from Exec.
1219
1220     """
1221     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1222     itype, etxt = ecode
1223     # first complete the msg
1224     if args:
1225       msg = msg % args
1226     # then format the whole message
1227     if self.op.error_codes:
1228       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1229     else:
1230       if item:
1231         item = " " + item
1232       else:
1233         item = ""
1234       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1235     # and finally report it via the feedback_fn
1236     self._feedback_fn("  - %s" % msg)
1237
1238   def _ErrorIf(self, cond, *args, **kwargs):
1239     """Log an error message if the passed condition is True.
1240
1241     """
1242     cond = bool(cond) or self.op.debug_simulate_errors
1243     if cond:
1244       self._Error(*args, **kwargs)
1245     # do not mark the operation as failed for WARN cases only
1246     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1247       self.bad = self.bad or cond
1248
1249   def _VerifyNode(self, ninfo, nresult):
1250     """Run multiple tests against a node.
1251
1252     Test list:
1253
1254       - compares ganeti version
1255       - checks vg existence and size > 20G
1256       - checks config file checksum
1257       - checks ssh to other nodes
1258
1259     @type ninfo: L{objects.Node}
1260     @param ninfo: the node to check
1261     @param nresult: the results from the node
1262     @rtype: boolean
1263     @return: whether overall this call was successful (and we can expect
1264          reasonable values in the respose)
1265
1266     """
1267     node = ninfo.name
1268     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1269
1270     # main result, nresult should be a non-empty dict
1271     test = not nresult or not isinstance(nresult, dict)
1272     _ErrorIf(test, self.ENODERPC, node,
1273                   "unable to verify node: no data returned")
1274     if test:
1275       return False
1276
1277     # compares ganeti version
1278     local_version = constants.PROTOCOL_VERSION
1279     remote_version = nresult.get("version", None)
1280     test = not (remote_version and
1281                 isinstance(remote_version, (list, tuple)) and
1282                 len(remote_version) == 2)
1283     _ErrorIf(test, self.ENODERPC, node,
1284              "connection to node returned invalid data")
1285     if test:
1286       return False
1287
1288     test = local_version != remote_version[0]
1289     _ErrorIf(test, self.ENODEVERSION, node,
1290              "incompatible protocol versions: master %s,"
1291              " node %s", local_version, remote_version[0])
1292     if test:
1293       return False
1294
1295     # node seems compatible, we can actually try to look into its results
1296
1297     # full package version
1298     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1299                   self.ENODEVERSION, node,
1300                   "software version mismatch: master %s, node %s",
1301                   constants.RELEASE_VERSION, remote_version[1],
1302                   code=self.ETYPE_WARNING)
1303
1304     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1305     if isinstance(hyp_result, dict):
1306       for hv_name, hv_result in hyp_result.iteritems():
1307         test = hv_result is not None
1308         _ErrorIf(test, self.ENODEHV, node,
1309                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1310
1311
1312     test = nresult.get(constants.NV_NODESETUP,
1313                            ["Missing NODESETUP results"])
1314     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1315              "; ".join(test))
1316
1317     return True
1318
1319   def _VerifyNodeTime(self, ninfo, nresult,
1320                       nvinfo_starttime, nvinfo_endtime):
1321     """Check the node time.
1322
1323     @type ninfo: L{objects.Node}
1324     @param ninfo: the node to check
1325     @param nresult: the remote results for the node
1326     @param nvinfo_starttime: the start time of the RPC call
1327     @param nvinfo_endtime: the end time of the RPC call
1328
1329     """
1330     node = ninfo.name
1331     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1332
1333     ntime = nresult.get(constants.NV_TIME, None)
1334     try:
1335       ntime_merged = utils.MergeTime(ntime)
1336     except (ValueError, TypeError):
1337       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1338       return
1339
1340     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1341       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1342     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1343       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1344     else:
1345       ntime_diff = None
1346
1347     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1348              "Node time diverges by at least %s from master node time",
1349              ntime_diff)
1350
1351   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1352     """Check the node time.
1353
1354     @type ninfo: L{objects.Node}
1355     @param ninfo: the node to check
1356     @param nresult: the remote results for the node
1357     @param vg_name: the configured VG name
1358
1359     """
1360     if vg_name is None:
1361       return
1362
1363     node = ninfo.name
1364     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1365
1366     # checks vg existence and size > 20G
1367     vglist = nresult.get(constants.NV_VGLIST, None)
1368     test = not vglist
1369     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1370     if not test:
1371       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1372                                             constants.MIN_VG_SIZE)
1373       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1374
1375     # check pv names
1376     pvlist = nresult.get(constants.NV_PVLIST, None)
1377     test = pvlist is None
1378     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1379     if not test:
1380       # check that ':' is not present in PV names, since it's a
1381       # special character for lvcreate (denotes the range of PEs to
1382       # use on the PV)
1383       for _, pvname, owner_vg in pvlist:
1384         test = ":" in pvname
1385         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1386                  " '%s' of VG '%s'", pvname, owner_vg)
1387
1388   def _VerifyNodeNetwork(self, ninfo, nresult):
1389     """Check the node time.
1390
1391     @type ninfo: L{objects.Node}
1392     @param ninfo: the node to check
1393     @param nresult: the remote results for the node
1394
1395     """
1396     node = ninfo.name
1397     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1398
1399     test = constants.NV_NODELIST not in nresult
1400     _ErrorIf(test, self.ENODESSH, node,
1401              "node hasn't returned node ssh connectivity data")
1402     if not test:
1403       if nresult[constants.NV_NODELIST]:
1404         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1405           _ErrorIf(True, self.ENODESSH, node,
1406                    "ssh communication with node '%s': %s", a_node, a_msg)
1407
1408     test = constants.NV_NODENETTEST not in nresult
1409     _ErrorIf(test, self.ENODENET, node,
1410              "node hasn't returned node tcp connectivity data")
1411     if not test:
1412       if nresult[constants.NV_NODENETTEST]:
1413         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1414         for anode in nlist:
1415           _ErrorIf(True, self.ENODENET, node,
1416                    "tcp communication with node '%s': %s",
1417                    anode, nresult[constants.NV_NODENETTEST][anode])
1418
1419     test = constants.NV_MASTERIP not in nresult
1420     _ErrorIf(test, self.ENODENET, node,
1421              "node hasn't returned node master IP reachability data")
1422     if not test:
1423       if not nresult[constants.NV_MASTERIP]:
1424         if node == self.master_node:
1425           msg = "the master node cannot reach the master IP (not configured?)"
1426         else:
1427           msg = "cannot reach the master IP"
1428         _ErrorIf(True, self.ENODENET, node, msg)
1429
1430
1431   def _VerifyInstance(self, instance, instanceconfig, node_image):
1432     """Verify an instance.
1433
1434     This function checks to see if the required block devices are
1435     available on the instance's node.
1436
1437     """
1438     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1439     node_current = instanceconfig.primary_node
1440
1441     node_vol_should = {}
1442     instanceconfig.MapLVsByNode(node_vol_should)
1443
1444     for node in node_vol_should:
1445       n_img = node_image[node]
1446       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1447         # ignore missing volumes on offline or broken nodes
1448         continue
1449       for volume in node_vol_should[node]:
1450         test = volume not in n_img.volumes
1451         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1452                  "volume %s missing on node %s", volume, node)
1453
1454     if instanceconfig.admin_up:
1455       pri_img = node_image[node_current]
1456       test = instance not in pri_img.instances and not pri_img.offline
1457       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1458                "instance not running on its primary node %s",
1459                node_current)
1460
1461     for node, n_img in node_image.items():
1462       if (not node == node_current):
1463         test = instance in n_img.instances
1464         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1465                  "instance should not run on node %s", node)
1466
1467   def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1468     """Verify if there are any unknown volumes in the cluster.
1469
1470     The .os, .swap and backup volumes are ignored. All other volumes are
1471     reported as unknown.
1472
1473     """
1474     for node, n_img in node_image.items():
1475       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1476         # skip non-healthy nodes
1477         continue
1478       for volume in n_img.volumes:
1479         test = (node not in node_vol_should or
1480                 volume not in node_vol_should[node])
1481         self._ErrorIf(test, self.ENODEORPHANLV, node,
1482                       "volume %s is unknown", volume)
1483
1484   def _VerifyOrphanInstances(self, instancelist, node_image):
1485     """Verify the list of running instances.
1486
1487     This checks what instances are running but unknown to the cluster.
1488
1489     """
1490     for node, n_img in node_image.items():
1491       for o_inst in n_img.instances:
1492         test = o_inst not in instancelist
1493         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1494                       "instance %s on node %s should not exist", o_inst, node)
1495
1496   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1497     """Verify N+1 Memory Resilience.
1498
1499     Check that if one single node dies we can still start all the
1500     instances it was primary for.
1501
1502     """
1503     for node, n_img in node_image.items():
1504       # This code checks that every node which is now listed as
1505       # secondary has enough memory to host all instances it is
1506       # supposed to should a single other node in the cluster fail.
1507       # FIXME: not ready for failover to an arbitrary node
1508       # FIXME: does not support file-backed instances
1509       # WARNING: we currently take into account down instances as well
1510       # as up ones, considering that even if they're down someone
1511       # might want to start them even in the event of a node failure.
1512       for prinode, instances in n_img.sbp.items():
1513         needed_mem = 0
1514         for instance in instances:
1515           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1516           if bep[constants.BE_AUTO_BALANCE]:
1517             needed_mem += bep[constants.BE_MEMORY]
1518         test = n_img.mfree < needed_mem
1519         self._ErrorIf(test, self.ENODEN1, node,
1520                       "not enough memory on to accommodate"
1521                       " failovers should peer node %s fail", prinode)
1522
1523   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1524                        master_files):
1525     """Verifies and computes the node required file checksums.
1526
1527     @type ninfo: L{objects.Node}
1528     @param ninfo: the node to check
1529     @param nresult: the remote results for the node
1530     @param file_list: required list of files
1531     @param local_cksum: dictionary of local files and their checksums
1532     @param master_files: list of files that only masters should have
1533
1534     """
1535     node = ninfo.name
1536     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1537
1538     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1539     test = not isinstance(remote_cksum, dict)
1540     _ErrorIf(test, self.ENODEFILECHECK, node,
1541              "node hasn't returned file checksum data")
1542     if test:
1543       return
1544
1545     for file_name in file_list:
1546       node_is_mc = ninfo.master_candidate
1547       must_have = (file_name not in master_files) or node_is_mc
1548       # missing
1549       test1 = file_name not in remote_cksum
1550       # invalid checksum
1551       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1552       # existing and good
1553       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1554       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1555                "file '%s' missing", file_name)
1556       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1557                "file '%s' has wrong checksum", file_name)
1558       # not candidate and this is not a must-have file
1559       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1560                "file '%s' should not exist on non master"
1561                " candidates (and the file is outdated)", file_name)
1562       # all good, except non-master/non-must have combination
1563       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1564                "file '%s' should not exist"
1565                " on non master candidates", file_name)
1566
1567   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1568     """Verifies and the node DRBD status.
1569
1570     @type ninfo: L{objects.Node}
1571     @param ninfo: the node to check
1572     @param nresult: the remote results for the node
1573     @param instanceinfo: the dict of instances
1574     @param drbd_map: the DRBD map as returned by
1575         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1576
1577     """
1578     node = ninfo.name
1579     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1580
1581     # compute the DRBD minors
1582     node_drbd = {}
1583     for minor, instance in drbd_map[node].items():
1584       test = instance not in instanceinfo
1585       _ErrorIf(test, self.ECLUSTERCFG, None,
1586                "ghost instance '%s' in temporary DRBD map", instance)
1587         # ghost instance should not be running, but otherwise we
1588         # don't give double warnings (both ghost instance and
1589         # unallocated minor in use)
1590       if test:
1591         node_drbd[minor] = (instance, False)
1592       else:
1593         instance = instanceinfo[instance]
1594         node_drbd[minor] = (instance.name, instance.admin_up)
1595
1596     # and now check them
1597     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1598     test = not isinstance(used_minors, (tuple, list))
1599     _ErrorIf(test, self.ENODEDRBD, node,
1600              "cannot parse drbd status file: %s", str(used_minors))
1601     if test:
1602       # we cannot check drbd status
1603       return
1604
1605     for minor, (iname, must_exist) in node_drbd.items():
1606       test = minor not in used_minors and must_exist
1607       _ErrorIf(test, self.ENODEDRBD, node,
1608                "drbd minor %d of instance %s is not active", minor, iname)
1609     for minor in used_minors:
1610       test = minor not in node_drbd
1611       _ErrorIf(test, self.ENODEDRBD, node,
1612                "unallocated drbd minor %d is in use", minor)
1613
1614   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1615     """Builds the node OS structures.
1616
1617     @type ninfo: L{objects.Node}
1618     @param ninfo: the node to check
1619     @param nresult: the remote results for the node
1620     @param nimg: the node image object
1621
1622     """
1623     node = ninfo.name
1624     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1625
1626     remote_os = nresult.get(constants.NV_OSLIST, None)
1627     test = (not isinstance(remote_os, list) or
1628             not compat.all(remote_os,
1629                            lambda v: isinstance(v, list) and len(v) == 7))
1630
1631     _ErrorIf(test, self.ENODEOS, node,
1632              "node hasn't returned valid OS data")
1633
1634     nimg.os_fail = test
1635
1636     if test:
1637       return
1638
1639     os_dict = {}
1640
1641     for (name, os_path, status, diagnose,
1642          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1643
1644       if name not in os_dict:
1645         os_dict[name] = []
1646
1647       # parameters is a list of lists instead of list of tuples due to
1648       # JSON lacking a real tuple type, fix it:
1649       parameters = [tuple(v) for v in parameters]
1650       os_dict[name].append((os_path, status, diagnose,
1651                             set(variants), set(parameters), set(api_ver)))
1652
1653     nimg.oslist = os_dict
1654
1655   def _VerifyNodeOS(self, ninfo, nimg, base):
1656     """Verifies the node OS list.
1657
1658     @type ninfo: L{objects.Node}
1659     @param ninfo: the node to check
1660     @param nimg: the node image object
1661     @param base: the 'template' node we match against (e.g. from the master)
1662
1663     """
1664     node = ninfo.name
1665     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1666
1667     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1668
1669     for os_name, os_data in nimg.oslist.items():
1670       assert os_data, "Empty OS status for OS %s?!" % os_name
1671       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1672       _ErrorIf(not f_status, self.ENODEOS, node,
1673                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1674       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1675                "OS '%s' has multiple entries (first one shadows the rest): %s",
1676                os_name, utils.CommaJoin([v[0] for v in os_data]))
1677       # this will catched in backend too
1678       _ErrorIf(compat.any(f_api, lambda v: v >= constants.OS_API_V15)
1679                and not f_var, self.ENODEOS, node,
1680                "OS %s with API at least %d does not declare any variant",
1681                os_name, constants.OS_API_V15)
1682       # comparisons with the 'base' image
1683       test = os_name not in base.oslist
1684       _ErrorIf(test, self.ENODEOS, node,
1685                "Extra OS %s not present on reference node (%s)",
1686                os_name, base.name)
1687       if test:
1688         continue
1689       assert base.oslist[os_name], "Base node has empty OS status?"
1690       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1691       if not b_status:
1692         # base OS is invalid, skipping
1693         continue
1694       for kind, a, b in [("API version", f_api, b_api),
1695                          ("variants list", f_var, b_var),
1696                          ("parameters", f_param, b_param)]:
1697         _ErrorIf(a != b, self.ENODEOS, node,
1698                  "OS %s %s differs from reference node %s: %s vs. %s",
1699                  kind, os_name, base.name,
1700                  utils.CommaJoin(a), utils.CommaJoin(a))
1701
1702     # check any missing OSes
1703     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1704     _ErrorIf(missing, self.ENODEOS, node,
1705              "OSes present on reference node %s but missing on this node: %s",
1706              base.name, utils.CommaJoin(missing))
1707
1708   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1709     """Verifies and updates the node volume data.
1710
1711     This function will update a L{NodeImage}'s internal structures
1712     with data from the remote call.
1713
1714     @type ninfo: L{objects.Node}
1715     @param ninfo: the node to check
1716     @param nresult: the remote results for the node
1717     @param nimg: the node image object
1718     @param vg_name: the configured VG name
1719
1720     """
1721     node = ninfo.name
1722     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1723
1724     nimg.lvm_fail = True
1725     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1726     if vg_name is None:
1727       pass
1728     elif isinstance(lvdata, basestring):
1729       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1730                utils.SafeEncode(lvdata))
1731     elif not isinstance(lvdata, dict):
1732       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1733     else:
1734       nimg.volumes = lvdata
1735       nimg.lvm_fail = False
1736
1737   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1738     """Verifies and updates the node instance list.
1739
1740     If the listing was successful, then updates this node's instance
1741     list. Otherwise, it marks the RPC call as failed for the instance
1742     list key.
1743
1744     @type ninfo: L{objects.Node}
1745     @param ninfo: the node to check
1746     @param nresult: the remote results for the node
1747     @param nimg: the node image object
1748
1749     """
1750     idata = nresult.get(constants.NV_INSTANCELIST, None)
1751     test = not isinstance(idata, list)
1752     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1753                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1754     if test:
1755       nimg.hyp_fail = True
1756     else:
1757       nimg.instances = idata
1758
1759   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1760     """Verifies and computes a node information map
1761
1762     @type ninfo: L{objects.Node}
1763     @param ninfo: the node to check
1764     @param nresult: the remote results for the node
1765     @param nimg: the node image object
1766     @param vg_name: the configured VG name
1767
1768     """
1769     node = ninfo.name
1770     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1771
1772     # try to read free memory (from the hypervisor)
1773     hv_info = nresult.get(constants.NV_HVINFO, None)
1774     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1775     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1776     if not test:
1777       try:
1778         nimg.mfree = int(hv_info["memory_free"])
1779       except (ValueError, TypeError):
1780         _ErrorIf(True, self.ENODERPC, node,
1781                  "node returned invalid nodeinfo, check hypervisor")
1782
1783     # FIXME: devise a free space model for file based instances as well
1784     if vg_name is not None:
1785       test = (constants.NV_VGLIST not in nresult or
1786               vg_name not in nresult[constants.NV_VGLIST])
1787       _ErrorIf(test, self.ENODELVM, node,
1788                "node didn't return data for the volume group '%s'"
1789                " - it is either missing or broken", vg_name)
1790       if not test:
1791         try:
1792           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1793         except (ValueError, TypeError):
1794           _ErrorIf(True, self.ENODERPC, node,
1795                    "node returned invalid LVM info, check LVM status")
1796
1797   def CheckPrereq(self):
1798     """Check prerequisites.
1799
1800     Transform the list of checks we're going to skip into a set and check that
1801     all its members are valid.
1802
1803     """
1804     self.skip_set = frozenset(self.op.skip_checks)
1805     if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1806       raise errors.OpPrereqError("Invalid checks to be skipped specified",
1807                                  errors.ECODE_INVAL)
1808
1809   def BuildHooksEnv(self):
1810     """Build hooks env.
1811
1812     Cluster-Verify hooks just ran in the post phase and their failure makes
1813     the output be logged in the verify output and the verification to fail.
1814
1815     """
1816     all_nodes = self.cfg.GetNodeList()
1817     env = {
1818       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1819       }
1820     for node in self.cfg.GetAllNodesInfo().values():
1821       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1822
1823     return env, [], all_nodes
1824
1825   def Exec(self, feedback_fn):
1826     """Verify integrity of cluster, performing various test on nodes.
1827
1828     """
1829     self.bad = False
1830     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1831     verbose = self.op.verbose
1832     self._feedback_fn = feedback_fn
1833     feedback_fn("* Verifying global settings")
1834     for msg in self.cfg.VerifyConfig():
1835       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1836
1837     # Check the cluster certificates
1838     for cert_filename in constants.ALL_CERT_FILES:
1839       (errcode, msg) = _VerifyCertificate(cert_filename)
1840       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1841
1842     vg_name = self.cfg.GetVGName()
1843     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1844     cluster = self.cfg.GetClusterInfo()
1845     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1846     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1847     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1848     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1849                         for iname in instancelist)
1850     i_non_redundant = [] # Non redundant instances
1851     i_non_a_balanced = [] # Non auto-balanced instances
1852     n_offline = 0 # Count of offline nodes
1853     n_drained = 0 # Count of nodes being drained
1854     node_vol_should = {}
1855
1856     # FIXME: verify OS list
1857     # do local checksums
1858     master_files = [constants.CLUSTER_CONF_FILE]
1859     master_node = self.master_node = self.cfg.GetMasterNode()
1860     master_ip = self.cfg.GetMasterIP()
1861
1862     file_names = ssconf.SimpleStore().GetFileList()
1863     file_names.extend(constants.ALL_CERT_FILES)
1864     file_names.extend(master_files)
1865     if cluster.modify_etc_hosts:
1866       file_names.append(constants.ETC_HOSTS)
1867
1868     local_checksums = utils.FingerprintFiles(file_names)
1869
1870     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1871     node_verify_param = {
1872       constants.NV_FILELIST: file_names,
1873       constants.NV_NODELIST: [node.name for node in nodeinfo
1874                               if not node.offline],
1875       constants.NV_HYPERVISOR: hypervisors,
1876       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1877                                   node.secondary_ip) for node in nodeinfo
1878                                  if not node.offline],
1879       constants.NV_INSTANCELIST: hypervisors,
1880       constants.NV_VERSION: None,
1881       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1882       constants.NV_NODESETUP: None,
1883       constants.NV_TIME: None,
1884       constants.NV_MASTERIP: (master_node, master_ip),
1885       constants.NV_OSLIST: None,
1886       }
1887
1888     if vg_name is not None:
1889       node_verify_param[constants.NV_VGLIST] = None
1890       node_verify_param[constants.NV_LVLIST] = vg_name
1891       node_verify_param[constants.NV_PVLIST] = [vg_name]
1892       node_verify_param[constants.NV_DRBDLIST] = None
1893
1894     # Build our expected cluster state
1895     node_image = dict((node.name, self.NodeImage(offline=node.offline,
1896                                                  name=node.name))
1897                       for node in nodeinfo)
1898
1899     for instance in instancelist:
1900       inst_config = instanceinfo[instance]
1901
1902       for nname in inst_config.all_nodes:
1903         if nname not in node_image:
1904           # ghost node
1905           gnode = self.NodeImage(name=nname)
1906           gnode.ghost = True
1907           node_image[nname] = gnode
1908
1909       inst_config.MapLVsByNode(node_vol_should)
1910
1911       pnode = inst_config.primary_node
1912       node_image[pnode].pinst.append(instance)
1913
1914       for snode in inst_config.secondary_nodes:
1915         nimg = node_image[snode]
1916         nimg.sinst.append(instance)
1917         if pnode not in nimg.sbp:
1918           nimg.sbp[pnode] = []
1919         nimg.sbp[pnode].append(instance)
1920
1921     # At this point, we have the in-memory data structures complete,
1922     # except for the runtime information, which we'll gather next
1923
1924     # Due to the way our RPC system works, exact response times cannot be
1925     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1926     # time before and after executing the request, we can at least have a time
1927     # window.
1928     nvinfo_starttime = time.time()
1929     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1930                                            self.cfg.GetClusterName())
1931     nvinfo_endtime = time.time()
1932
1933     all_drbd_map = self.cfg.ComputeDRBDMap()
1934
1935     feedback_fn("* Verifying node status")
1936
1937     refos_img = None
1938
1939     for node_i in nodeinfo:
1940       node = node_i.name
1941       nimg = node_image[node]
1942
1943       if node_i.offline:
1944         if verbose:
1945           feedback_fn("* Skipping offline node %s" % (node,))
1946         n_offline += 1
1947         continue
1948
1949       if node == master_node:
1950         ntype = "master"
1951       elif node_i.master_candidate:
1952         ntype = "master candidate"
1953       elif node_i.drained:
1954         ntype = "drained"
1955         n_drained += 1
1956       else:
1957         ntype = "regular"
1958       if verbose:
1959         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1960
1961       msg = all_nvinfo[node].fail_msg
1962       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1963       if msg:
1964         nimg.rpc_fail = True
1965         continue
1966
1967       nresult = all_nvinfo[node].payload
1968
1969       nimg.call_ok = self._VerifyNode(node_i, nresult)
1970       self._VerifyNodeNetwork(node_i, nresult)
1971       self._VerifyNodeLVM(node_i, nresult, vg_name)
1972       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1973                             master_files)
1974       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1975       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1976
1977       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1978       self._UpdateNodeInstances(node_i, nresult, nimg)
1979       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1980       self._UpdateNodeOS(node_i, nresult, nimg)
1981       if not nimg.os_fail:
1982         if refos_img is None:
1983           refos_img = nimg
1984         self._VerifyNodeOS(node_i, nimg, refos_img)
1985
1986     feedback_fn("* Verifying instance status")
1987     for instance in instancelist:
1988       if verbose:
1989         feedback_fn("* Verifying instance %s" % instance)
1990       inst_config = instanceinfo[instance]
1991       self._VerifyInstance(instance, inst_config, node_image)
1992       inst_nodes_offline = []
1993
1994       pnode = inst_config.primary_node
1995       pnode_img = node_image[pnode]
1996       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1997                self.ENODERPC, pnode, "instance %s, connection to"
1998                " primary node failed", instance)
1999
2000       if pnode_img.offline:
2001         inst_nodes_offline.append(pnode)
2002
2003       # If the instance is non-redundant we cannot survive losing its primary
2004       # node, so we are not N+1 compliant. On the other hand we have no disk
2005       # templates with more than one secondary so that situation is not well
2006       # supported either.
2007       # FIXME: does not support file-backed instances
2008       if not inst_config.secondary_nodes:
2009         i_non_redundant.append(instance)
2010       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2011                instance, "instance has multiple secondary nodes: %s",
2012                utils.CommaJoin(inst_config.secondary_nodes),
2013                code=self.ETYPE_WARNING)
2014
2015       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2016         i_non_a_balanced.append(instance)
2017
2018       for snode in inst_config.secondary_nodes:
2019         s_img = node_image[snode]
2020         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2021                  "instance %s, connection to secondary node failed", instance)
2022
2023         if s_img.offline:
2024           inst_nodes_offline.append(snode)
2025
2026       # warn that the instance lives on offline nodes
2027       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2028                "instance lives on offline node(s) %s",
2029                utils.CommaJoin(inst_nodes_offline))
2030       # ... or ghost nodes
2031       for node in inst_config.all_nodes:
2032         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2033                  "instance lives on ghost node %s", node)
2034
2035     feedback_fn("* Verifying orphan volumes")
2036     self._VerifyOrphanVolumes(node_vol_should, node_image)
2037
2038     feedback_fn("* Verifying orphan instances")
2039     self._VerifyOrphanInstances(instancelist, node_image)
2040
2041     if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
2042       feedback_fn("* Verifying N+1 Memory redundancy")
2043       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2044
2045     feedback_fn("* Other Notes")
2046     if i_non_redundant:
2047       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2048                   % len(i_non_redundant))
2049
2050     if i_non_a_balanced:
2051       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2052                   % len(i_non_a_balanced))
2053
2054     if n_offline:
2055       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2056
2057     if n_drained:
2058       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2059
2060     return not self.bad
2061
2062   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2063     """Analyze the post-hooks' result
2064
2065     This method analyses the hook result, handles it, and sends some
2066     nicely-formatted feedback back to the user.
2067
2068     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2069         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2070     @param hooks_results: the results of the multi-node hooks rpc call
2071     @param feedback_fn: function used send feedback back to the caller
2072     @param lu_result: previous Exec result
2073     @return: the new Exec result, based on the previous result
2074         and hook results
2075
2076     """
2077     # We only really run POST phase hooks, and are only interested in
2078     # their results
2079     if phase == constants.HOOKS_PHASE_POST:
2080       # Used to change hooks' output to proper indentation
2081       indent_re = re.compile('^', re.M)
2082       feedback_fn("* Hooks Results")
2083       assert hooks_results, "invalid result from hooks"
2084
2085       for node_name in hooks_results:
2086         res = hooks_results[node_name]
2087         msg = res.fail_msg
2088         test = msg and not res.offline
2089         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2090                       "Communication failure in hooks execution: %s", msg)
2091         if res.offline or msg:
2092           # No need to investigate payload if node is offline or gave an error.
2093           # override manually lu_result here as _ErrorIf only
2094           # overrides self.bad
2095           lu_result = 1
2096           continue
2097         for script, hkr, output in res.payload:
2098           test = hkr == constants.HKR_FAIL
2099           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2100                         "Script %s failed, output:", script)
2101           if test:
2102             output = indent_re.sub('      ', output)
2103             feedback_fn("%s" % output)
2104             lu_result = 0
2105
2106       return lu_result
2107
2108
2109 class LUVerifyDisks(NoHooksLU):
2110   """Verifies the cluster disks status.
2111
2112   """
2113   _OP_REQP = []
2114   REQ_BGL = False
2115
2116   def ExpandNames(self):
2117     self.needed_locks = {
2118       locking.LEVEL_NODE: locking.ALL_SET,
2119       locking.LEVEL_INSTANCE: locking.ALL_SET,
2120     }
2121     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2122
2123   def CheckPrereq(self):
2124     """Check prerequisites.
2125
2126     This has no prerequisites.
2127
2128     """
2129     pass
2130
2131   def Exec(self, feedback_fn):
2132     """Verify integrity of cluster disks.
2133
2134     @rtype: tuple of three items
2135     @return: a tuple of (dict of node-to-node_error, list of instances
2136         which need activate-disks, dict of instance: (node, volume) for
2137         missing volumes
2138
2139     """
2140     result = res_nodes, res_instances, res_missing = {}, [], {}
2141
2142     vg_name = self.cfg.GetVGName()
2143     nodes = utils.NiceSort(self.cfg.GetNodeList())
2144     instances = [self.cfg.GetInstanceInfo(name)
2145                  for name in self.cfg.GetInstanceList()]
2146
2147     nv_dict = {}
2148     for inst in instances:
2149       inst_lvs = {}
2150       if (not inst.admin_up or
2151           inst.disk_template not in constants.DTS_NET_MIRROR):
2152         continue
2153       inst.MapLVsByNode(inst_lvs)
2154       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2155       for node, vol_list in inst_lvs.iteritems():
2156         for vol in vol_list:
2157           nv_dict[(node, vol)] = inst
2158
2159     if not nv_dict:
2160       return result
2161
2162     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2163
2164     for node in nodes:
2165       # node_volume
2166       node_res = node_lvs[node]
2167       if node_res.offline:
2168         continue
2169       msg = node_res.fail_msg
2170       if msg:
2171         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2172         res_nodes[node] = msg
2173         continue
2174
2175       lvs = node_res.payload
2176       for lv_name, (_, _, lv_online) in lvs.items():
2177         inst = nv_dict.pop((node, lv_name), None)
2178         if (not lv_online and inst is not None
2179             and inst.name not in res_instances):
2180           res_instances.append(inst.name)
2181
2182     # any leftover items in nv_dict are missing LVs, let's arrange the
2183     # data better
2184     for key, inst in nv_dict.iteritems():
2185       if inst.name not in res_missing:
2186         res_missing[inst.name] = []
2187       res_missing[inst.name].append(key)
2188
2189     return result
2190
2191
2192 class LURepairDiskSizes(NoHooksLU):
2193   """Verifies the cluster disks sizes.
2194
2195   """
2196   _OP_REQP = ["instances"]
2197   REQ_BGL = False
2198
2199   def CheckArguments(self):
2200     if not isinstance(self.op.instances, list):
2201       raise errors.OpPrereqError("Invalid argument type 'instances'",
2202                                  errors.ECODE_INVAL)
2203
2204   def ExpandNames(self):
2205     if self.op.instances:
2206       self.wanted_names = []
2207       for name in self.op.instances:
2208         full_name = _ExpandInstanceName(self.cfg, name)
2209         self.wanted_names.append(full_name)
2210       self.needed_locks = {
2211         locking.LEVEL_NODE: [],
2212         locking.LEVEL_INSTANCE: self.wanted_names,
2213         }
2214       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2215     else:
2216       self.wanted_names = None
2217       self.needed_locks = {
2218         locking.LEVEL_NODE: locking.ALL_SET,
2219         locking.LEVEL_INSTANCE: locking.ALL_SET,
2220         }
2221     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2222
2223   def DeclareLocks(self, level):
2224     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2225       self._LockInstancesNodes(primary_only=True)
2226
2227   def CheckPrereq(self):
2228     """Check prerequisites.
2229
2230     This only checks the optional instance list against the existing names.
2231
2232     """
2233     if self.wanted_names is None:
2234       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2235
2236     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2237                              in self.wanted_names]
2238
2239   def _EnsureChildSizes(self, disk):
2240     """Ensure children of the disk have the needed disk size.
2241
2242     This is valid mainly for DRBD8 and fixes an issue where the
2243     children have smaller disk size.
2244
2245     @param disk: an L{ganeti.objects.Disk} object
2246
2247     """
2248     if disk.dev_type == constants.LD_DRBD8:
2249       assert disk.children, "Empty children for DRBD8?"
2250       fchild = disk.children[0]
2251       mismatch = fchild.size < disk.size
2252       if mismatch:
2253         self.LogInfo("Child disk has size %d, parent %d, fixing",
2254                      fchild.size, disk.size)
2255         fchild.size = disk.size
2256
2257       # and we recurse on this child only, not on the metadev
2258       return self._EnsureChildSizes(fchild) or mismatch
2259     else:
2260       return False
2261
2262   def Exec(self, feedback_fn):
2263     """Verify the size of cluster disks.
2264
2265     """
2266     # TODO: check child disks too
2267     # TODO: check differences in size between primary/secondary nodes
2268     per_node_disks = {}
2269     for instance in self.wanted_instances:
2270       pnode = instance.primary_node
2271       if pnode not in per_node_disks:
2272         per_node_disks[pnode] = []
2273       for idx, disk in enumerate(instance.disks):
2274         per_node_disks[pnode].append((instance, idx, disk))
2275
2276     changed = []
2277     for node, dskl in per_node_disks.items():
2278       newl = [v[2].Copy() for v in dskl]
2279       for dsk in newl:
2280         self.cfg.SetDiskID(dsk, node)
2281       result = self.rpc.call_blockdev_getsizes(node, newl)
2282       if result.fail_msg:
2283         self.LogWarning("Failure in blockdev_getsizes call to node"
2284                         " %s, ignoring", node)
2285         continue
2286       if len(result.data) != len(dskl):
2287         self.LogWarning("Invalid result from node %s, ignoring node results",
2288                         node)
2289         continue
2290       for ((instance, idx, disk), size) in zip(dskl, result.data):
2291         if size is None:
2292           self.LogWarning("Disk %d of instance %s did not return size"
2293                           " information, ignoring", idx, instance.name)
2294           continue
2295         if not isinstance(size, (int, long)):
2296           self.LogWarning("Disk %d of instance %s did not return valid"
2297                           " size information, ignoring", idx, instance.name)
2298           continue
2299         size = size >> 20
2300         if size != disk.size:
2301           self.LogInfo("Disk %d of instance %s has mismatched size,"
2302                        " correcting: recorded %d, actual %d", idx,
2303                        instance.name, disk.size, size)
2304           disk.size = size
2305           self.cfg.Update(instance, feedback_fn)
2306           changed.append((instance.name, idx, size))
2307         if self._EnsureChildSizes(disk):
2308           self.cfg.Update(instance, feedback_fn)
2309           changed.append((instance.name, idx, disk.size))
2310     return changed
2311
2312
2313 class LURenameCluster(LogicalUnit):
2314   """Rename the cluster.
2315
2316   """
2317   HPATH = "cluster-rename"
2318   HTYPE = constants.HTYPE_CLUSTER
2319   _OP_REQP = ["name"]
2320
2321   def BuildHooksEnv(self):
2322     """Build hooks env.
2323
2324     """
2325     env = {
2326       "OP_TARGET": self.cfg.GetClusterName(),
2327       "NEW_NAME": self.op.name,
2328       }
2329     mn = self.cfg.GetMasterNode()
2330     all_nodes = self.cfg.GetNodeList()
2331     return env, [mn], all_nodes
2332
2333   def CheckPrereq(self):
2334     """Verify that the passed name is a valid one.
2335
2336     """
2337     hostname = utils.GetHostInfo(self.op.name)
2338
2339     new_name = hostname.name
2340     self.ip = new_ip = hostname.ip
2341     old_name = self.cfg.GetClusterName()
2342     old_ip = self.cfg.GetMasterIP()
2343     if new_name == old_name and new_ip == old_ip:
2344       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2345                                  " cluster has changed",
2346                                  errors.ECODE_INVAL)
2347     if new_ip != old_ip:
2348       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2349         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2350                                    " reachable on the network. Aborting." %
2351                                    new_ip, errors.ECODE_NOTUNIQUE)
2352
2353     self.op.name = new_name
2354
2355   def Exec(self, feedback_fn):
2356     """Rename the cluster.
2357
2358     """
2359     clustername = self.op.name
2360     ip = self.ip
2361
2362     # shutdown the master IP
2363     master = self.cfg.GetMasterNode()
2364     result = self.rpc.call_node_stop_master(master, False)
2365     result.Raise("Could not disable the master role")
2366
2367     try:
2368       cluster = self.cfg.GetClusterInfo()
2369       cluster.cluster_name = clustername
2370       cluster.master_ip = ip
2371       self.cfg.Update(cluster, feedback_fn)
2372
2373       # update the known hosts file
2374       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2375       node_list = self.cfg.GetNodeList()
2376       try:
2377         node_list.remove(master)
2378       except ValueError:
2379         pass
2380       result = self.rpc.call_upload_file(node_list,
2381                                          constants.SSH_KNOWN_HOSTS_FILE)
2382       for to_node, to_result in result.iteritems():
2383         msg = to_result.fail_msg
2384         if msg:
2385           msg = ("Copy of file %s to node %s failed: %s" %
2386                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2387           self.proc.LogWarning(msg)
2388
2389     finally:
2390       result = self.rpc.call_node_start_master(master, False, False)
2391       msg = result.fail_msg
2392       if msg:
2393         self.LogWarning("Could not re-enable the master role on"
2394                         " the master, please restart manually: %s", msg)
2395
2396
2397 def _RecursiveCheckIfLVMBased(disk):
2398   """Check if the given disk or its children are lvm-based.
2399
2400   @type disk: L{objects.Disk}
2401   @param disk: the disk to check
2402   @rtype: boolean
2403   @return: boolean indicating whether a LD_LV dev_type was found or not
2404
2405   """
2406   if disk.children:
2407     for chdisk in disk.children:
2408       if _RecursiveCheckIfLVMBased(chdisk):
2409         return True
2410   return disk.dev_type == constants.LD_LV
2411
2412
2413 class LUSetClusterParams(LogicalUnit):
2414   """Change the parameters of the cluster.
2415
2416   """
2417   HPATH = "cluster-modify"
2418   HTYPE = constants.HTYPE_CLUSTER
2419   _OP_REQP = []
2420   _OP_DEFS = [
2421     ("candidate_pool_size", None),
2422     ("uid_pool", None),
2423     ("add_uids", None),
2424     ("remove_uids", None),
2425     ]
2426   REQ_BGL = False
2427
2428   def CheckArguments(self):
2429     """Check parameters
2430
2431     """
2432     if self.op.candidate_pool_size is not None:
2433       try:
2434         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2435       except (ValueError, TypeError), err:
2436         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2437                                    str(err), errors.ECODE_INVAL)
2438       if self.op.candidate_pool_size < 1:
2439         raise errors.OpPrereqError("At least one master candidate needed",
2440                                    errors.ECODE_INVAL)
2441
2442     _CheckBooleanOpField(self.op, "maintain_node_health")
2443
2444     if self.op.uid_pool:
2445       uidpool.CheckUidPool(self.op.uid_pool)
2446
2447     if self.op.add_uids:
2448       uidpool.CheckUidPool(self.op.add_uids)
2449
2450     if self.op.remove_uids:
2451       uidpool.CheckUidPool(self.op.remove_uids)
2452
2453   def ExpandNames(self):
2454     # FIXME: in the future maybe other cluster params won't require checking on
2455     # all nodes to be modified.
2456     self.needed_locks = {
2457       locking.LEVEL_NODE: locking.ALL_SET,
2458     }
2459     self.share_locks[locking.LEVEL_NODE] = 1
2460
2461   def BuildHooksEnv(self):
2462     """Build hooks env.
2463
2464     """
2465     env = {
2466       "OP_TARGET": self.cfg.GetClusterName(),
2467       "NEW_VG_NAME": self.op.vg_name,
2468       }
2469     mn = self.cfg.GetMasterNode()
2470     return env, [mn], [mn]
2471
2472   def CheckPrereq(self):
2473     """Check prerequisites.
2474
2475     This checks whether the given params don't conflict and
2476     if the given volume group is valid.
2477
2478     """
2479     if self.op.vg_name is not None and not self.op.vg_name:
2480       instances = self.cfg.GetAllInstancesInfo().values()
2481       for inst in instances:
2482         for disk in inst.disks:
2483           if _RecursiveCheckIfLVMBased(disk):
2484             raise errors.OpPrereqError("Cannot disable lvm storage while"
2485                                        " lvm-based instances exist",
2486                                        errors.ECODE_INVAL)
2487
2488     node_list = self.acquired_locks[locking.LEVEL_NODE]
2489
2490     # if vg_name not None, checks given volume group on all nodes
2491     if self.op.vg_name:
2492       vglist = self.rpc.call_vg_list(node_list)
2493       for node in node_list:
2494         msg = vglist[node].fail_msg
2495         if msg:
2496           # ignoring down node
2497           self.LogWarning("Error while gathering data on node %s"
2498                           " (ignoring node): %s", node, msg)
2499           continue
2500         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2501                                               self.op.vg_name,
2502                                               constants.MIN_VG_SIZE)
2503         if vgstatus:
2504           raise errors.OpPrereqError("Error on node '%s': %s" %
2505                                      (node, vgstatus), errors.ECODE_ENVIRON)
2506
2507     self.cluster = cluster = self.cfg.GetClusterInfo()
2508     # validate params changes
2509     if self.op.beparams:
2510       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2511       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2512
2513     if self.op.nicparams:
2514       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2515       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2516       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2517       nic_errors = []
2518
2519       # check all instances for consistency
2520       for instance in self.cfg.GetAllInstancesInfo().values():
2521         for nic_idx, nic in enumerate(instance.nics):
2522           params_copy = copy.deepcopy(nic.nicparams)
2523           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2524
2525           # check parameter syntax
2526           try:
2527             objects.NIC.CheckParameterSyntax(params_filled)
2528           except errors.ConfigurationError, err:
2529             nic_errors.append("Instance %s, nic/%d: %s" %
2530                               (instance.name, nic_idx, err))
2531
2532           # if we're moving instances to routed, check that they have an ip
2533           target_mode = params_filled[constants.NIC_MODE]
2534           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2535             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2536                               (instance.name, nic_idx))
2537       if nic_errors:
2538         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2539                                    "\n".join(nic_errors))
2540
2541     # hypervisor list/parameters
2542     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2543     if self.op.hvparams:
2544       if not isinstance(self.op.hvparams, dict):
2545         raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2546                                    errors.ECODE_INVAL)
2547       for hv_name, hv_dict in self.op.hvparams.items():
2548         if hv_name not in self.new_hvparams:
2549           self.new_hvparams[hv_name] = hv_dict
2550         else:
2551           self.new_hvparams[hv_name].update(hv_dict)
2552
2553     # os hypervisor parameters
2554     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2555     if self.op.os_hvp:
2556       if not isinstance(self.op.os_hvp, dict):
2557         raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2558                                    errors.ECODE_INVAL)
2559       for os_name, hvs in self.op.os_hvp.items():
2560         if not isinstance(hvs, dict):
2561           raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2562                                       " input"), errors.ECODE_INVAL)
2563         if os_name not in self.new_os_hvp:
2564           self.new_os_hvp[os_name] = hvs
2565         else:
2566           for hv_name, hv_dict in hvs.items():
2567             if hv_name not in self.new_os_hvp[os_name]:
2568               self.new_os_hvp[os_name][hv_name] = hv_dict
2569             else:
2570               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2571
2572     # os parameters
2573     self.new_osp = objects.FillDict(cluster.osparams, {})
2574     if self.op.osparams:
2575       if not isinstance(self.op.osparams, dict):
2576         raise errors.OpPrereqError("Invalid 'osparams' parameter on input",
2577                                    errors.ECODE_INVAL)
2578       for os_name, osp in self.op.osparams.items():
2579         if not isinstance(osp, dict):
2580           raise errors.OpPrereqError(("Invalid 'osparams' parameter on"
2581                                       " input"), errors.ECODE_INVAL)
2582         if os_name not in self.new_osp:
2583           self.new_osp[os_name] = {}
2584
2585         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2586                                                   use_none=True)
2587
2588         if not self.new_osp[os_name]:
2589           # we removed all parameters
2590           del self.new_osp[os_name]
2591         else:
2592           # check the parameter validity (remote check)
2593           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2594                          os_name, self.new_osp[os_name])
2595
2596     # changes to the hypervisor list
2597     if self.op.enabled_hypervisors is not None:
2598       self.hv_list = self.op.enabled_hypervisors
2599       if not self.hv_list:
2600         raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2601                                    " least one member",
2602                                    errors.ECODE_INVAL)
2603       invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2604       if invalid_hvs:
2605         raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2606                                    " entries: %s" %
2607                                    utils.CommaJoin(invalid_hvs),
2608                                    errors.ECODE_INVAL)
2609       for hv in self.hv_list:
2610         # if the hypervisor doesn't already exist in the cluster
2611         # hvparams, we initialize it to empty, and then (in both
2612         # cases) we make sure to fill the defaults, as we might not
2613         # have a complete defaults list if the hypervisor wasn't
2614         # enabled before
2615         if hv not in new_hvp:
2616           new_hvp[hv] = {}
2617         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2618         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2619     else:
2620       self.hv_list = cluster.enabled_hypervisors
2621
2622     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2623       # either the enabled list has changed, or the parameters have, validate
2624       for hv_name, hv_params in self.new_hvparams.items():
2625         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2626             (self.op.enabled_hypervisors and
2627              hv_name in self.op.enabled_hypervisors)):
2628           # either this is a new hypervisor, or its parameters have changed
2629           hv_class = hypervisor.GetHypervisor(hv_name)
2630           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2631           hv_class.CheckParameterSyntax(hv_params)
2632           _CheckHVParams(self, node_list, hv_name, hv_params)
2633
2634     if self.op.os_hvp:
2635       # no need to check any newly-enabled hypervisors, since the
2636       # defaults have already been checked in the above code-block
2637       for os_name, os_hvp in self.new_os_hvp.items():
2638         for hv_name, hv_params in os_hvp.items():
2639           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2640           # we need to fill in the new os_hvp on top of the actual hv_p
2641           cluster_defaults = self.new_hvparams.get(hv_name, {})
2642           new_osp = objects.FillDict(cluster_defaults, hv_params)
2643           hv_class = hypervisor.GetHypervisor(hv_name)
2644           hv_class.CheckParameterSyntax(new_osp)
2645           _CheckHVParams(self, node_list, hv_name, new_osp)
2646
2647
2648   def Exec(self, feedback_fn):
2649     """Change the parameters of the cluster.
2650
2651     """
2652     if self.op.vg_name is not None:
2653       new_volume = self.op.vg_name
2654       if not new_volume:
2655         new_volume = None
2656       if new_volume != self.cfg.GetVGName():
2657         self.cfg.SetVGName(new_volume)
2658       else:
2659         feedback_fn("Cluster LVM configuration already in desired"
2660                     " state, not changing")
2661     if self.op.hvparams:
2662       self.cluster.hvparams = self.new_hvparams
2663     if self.op.os_hvp:
2664       self.cluster.os_hvp = self.new_os_hvp
2665     if self.op.enabled_hypervisors is not None:
2666       self.cluster.hvparams = self.new_hvparams
2667       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2668     if self.op.beparams:
2669       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2670     if self.op.nicparams:
2671       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2672     if self.op.osparams:
2673       self.cluster.osparams = self.new_osp
2674
2675     if self.op.candidate_pool_size is not None:
2676       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2677       # we need to update the pool size here, otherwise the save will fail
2678       _AdjustCandidatePool(self, [])
2679
2680     if self.op.maintain_node_health is not None:
2681       self.cluster.maintain_node_health = self.op.maintain_node_health
2682
2683     if self.op.add_uids is not None:
2684       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2685
2686     if self.op.remove_uids is not None:
2687       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2688
2689     if self.op.uid_pool is not None:
2690       self.cluster.uid_pool = self.op.uid_pool
2691
2692     self.cfg.Update(self.cluster, feedback_fn)
2693
2694
2695 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2696   """Distribute additional files which are part of the cluster configuration.
2697
2698   ConfigWriter takes care of distributing the config and ssconf files, but
2699   there are more files which should be distributed to all nodes. This function
2700   makes sure those are copied.
2701
2702   @param lu: calling logical unit
2703   @param additional_nodes: list of nodes not in the config to distribute to
2704
2705   """
2706   # 1. Gather target nodes
2707   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2708   dist_nodes = lu.cfg.GetOnlineNodeList()
2709   if additional_nodes is not None:
2710     dist_nodes.extend(additional_nodes)
2711   if myself.name in dist_nodes:
2712     dist_nodes.remove(myself.name)
2713
2714   # 2. Gather files to distribute
2715   dist_files = set([constants.ETC_HOSTS,
2716                     constants.SSH_KNOWN_HOSTS_FILE,
2717                     constants.RAPI_CERT_FILE,
2718                     constants.RAPI_USERS_FILE,
2719                     constants.CONFD_HMAC_KEY,
2720                     constants.CLUSTER_DOMAIN_SECRET_FILE,
2721                    ])
2722
2723   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2724   for hv_name in enabled_hypervisors:
2725     hv_class = hypervisor.GetHypervisor(hv_name)
2726     dist_files.update(hv_class.GetAncillaryFiles())
2727
2728   # 3. Perform the files upload
2729   for fname in dist_files:
2730     if os.path.exists(fname):
2731       result = lu.rpc.call_upload_file(dist_nodes, fname)
2732       for to_node, to_result in result.items():
2733         msg = to_result.fail_msg
2734         if msg:
2735           msg = ("Copy of file %s to node %s failed: %s" %
2736                  (fname, to_node, msg))
2737           lu.proc.LogWarning(msg)
2738
2739
2740 class LURedistributeConfig(NoHooksLU):
2741   """Force the redistribution of cluster configuration.
2742
2743   This is a very simple LU.
2744
2745   """
2746   _OP_REQP = []
2747   REQ_BGL = False
2748
2749   def ExpandNames(self):
2750     self.needed_locks = {
2751       locking.LEVEL_NODE: locking.ALL_SET,
2752     }
2753     self.share_locks[locking.LEVEL_NODE] = 1
2754
2755   def CheckPrereq(self):
2756     """Check prerequisites.
2757
2758     """
2759
2760   def Exec(self, feedback_fn):
2761     """Redistribute the configuration.
2762
2763     """
2764     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2765     _RedistributeAncillaryFiles(self)
2766
2767
2768 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2769   """Sleep and poll for an instance's disk to sync.
2770
2771   """
2772   if not instance.disks or disks is not None and not disks:
2773     return True
2774
2775   disks = _ExpandCheckDisks(instance, disks)
2776
2777   if not oneshot:
2778     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2779
2780   node = instance.primary_node
2781
2782   for dev in disks:
2783     lu.cfg.SetDiskID(dev, node)
2784
2785   # TODO: Convert to utils.Retry
2786
2787   retries = 0
2788   degr_retries = 10 # in seconds, as we sleep 1 second each time
2789   while True:
2790     max_time = 0
2791     done = True
2792     cumul_degraded = False
2793     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2794     msg = rstats.fail_msg
2795     if msg:
2796       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2797       retries += 1
2798       if retries >= 10:
2799         raise errors.RemoteError("Can't contact node %s for mirror data,"
2800                                  " aborting." % node)
2801       time.sleep(6)
2802       continue
2803     rstats = rstats.payload
2804     retries = 0
2805     for i, mstat in enumerate(rstats):
2806       if mstat is None:
2807         lu.LogWarning("Can't compute data for node %s/%s",
2808                            node, disks[i].iv_name)
2809         continue
2810
2811       cumul_degraded = (cumul_degraded or
2812                         (mstat.is_degraded and mstat.sync_percent is None))
2813       if mstat.sync_percent is not None:
2814         done = False
2815         if mstat.estimated_time is not None:
2816           rem_time = ("%s remaining (estimated)" %
2817                       utils.FormatSeconds(mstat.estimated_time))
2818           max_time = mstat.estimated_time
2819         else:
2820           rem_time = "no time estimate"
2821         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2822                         (disks[i].iv_name, mstat.sync_percent, rem_time))
2823
2824     # if we're done but degraded, let's do a few small retries, to
2825     # make sure we see a stable and not transient situation; therefore
2826     # we force restart of the loop
2827     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2828       logging.info("Degraded disks found, %d retries left", degr_retries)
2829       degr_retries -= 1
2830       time.sleep(1)
2831       continue
2832
2833     if done or oneshot:
2834       break
2835
2836     time.sleep(min(60, max_time))
2837
2838   if done:
2839     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2840   return not cumul_degraded
2841
2842
2843 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2844   """Check that mirrors are not degraded.
2845
2846   The ldisk parameter, if True, will change the test from the
2847   is_degraded attribute (which represents overall non-ok status for
2848   the device(s)) to the ldisk (representing the local storage status).
2849
2850   """
2851   lu.cfg.SetDiskID(dev, node)
2852
2853   result = True
2854
2855   if on_primary or dev.AssembleOnSecondary():
2856     rstats = lu.rpc.call_blockdev_find(node, dev)
2857     msg = rstats.fail_msg
2858     if msg:
2859       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2860       result = False
2861     elif not rstats.payload:
2862       lu.LogWarning("Can't find disk on node %s", node)
2863       result = False
2864     else:
2865       if ldisk:
2866         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2867       else:
2868         result = result and not rstats.payload.is_degraded
2869
2870   if dev.children:
2871     for child in dev.children:
2872       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2873
2874   return result
2875
2876
2877 class LUDiagnoseOS(NoHooksLU):
2878   """Logical unit for OS diagnose/query.
2879
2880   """
2881   _OP_REQP = ["output_fields", "names"]
2882   REQ_BGL = False
2883   _FIELDS_STATIC = utils.FieldSet()
2884   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2885                                    "parameters", "api_versions")
2886
2887   def CheckArguments(self):
2888     if self.op.names:
2889       raise errors.OpPrereqError("Selective OS query not supported",
2890                                  errors.ECODE_INVAL)
2891
2892     _CheckOutputFields(static=self._FIELDS_STATIC,
2893                        dynamic=self._FIELDS_DYNAMIC,
2894                        selected=self.op.output_fields)
2895
2896   def ExpandNames(self):
2897     # Lock all nodes, in shared mode
2898     # Temporary removal of locks, should be reverted later
2899     # TODO: reintroduce locks when they are lighter-weight
2900     self.needed_locks = {}
2901     #self.share_locks[locking.LEVEL_NODE] = 1
2902     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2903
2904   def CheckPrereq(self):
2905     """Check prerequisites.
2906
2907     """
2908
2909   @staticmethod
2910   def _DiagnoseByOS(rlist):
2911     """Remaps a per-node return list into an a per-os per-node dictionary
2912
2913     @param rlist: a map with node names as keys and OS objects as values
2914
2915     @rtype: dict
2916     @return: a dictionary with osnames as keys and as value another
2917         map, with nodes as keys and tuples of (path, status, diagnose,
2918         variants, parameters, api_versions) as values, eg::
2919
2920           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2921                                      (/srv/..., False, "invalid api")],
2922                            "node2": [(/srv/..., True, "", [], [])]}
2923           }
2924
2925     """
2926     all_os = {}
2927     # we build here the list of nodes that didn't fail the RPC (at RPC
2928     # level), so that nodes with a non-responding node daemon don't
2929     # make all OSes invalid
2930     good_nodes = [node_name for node_name in rlist
2931                   if not rlist[node_name].fail_msg]
2932     for node_name, nr in rlist.items():
2933       if nr.fail_msg or not nr.payload:
2934         continue
2935       for (name, path, status, diagnose, variants,
2936            params, api_versions) in nr.payload:
2937         if name not in all_os:
2938           # build a list of nodes for this os containing empty lists
2939           # for each node in node_list
2940           all_os[name] = {}
2941           for nname in good_nodes:
2942             all_os[name][nname] = []
2943         # convert params from [name, help] to (name, help)
2944         params = [tuple(v) for v in params]
2945         all_os[name][node_name].append((path, status, diagnose,
2946                                         variants, params, api_versions))
2947     return all_os
2948
2949   def Exec(self, feedback_fn):
2950     """Compute the list of OSes.
2951
2952     """
2953     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2954     node_data = self.rpc.call_os_diagnose(valid_nodes)
2955     pol = self._DiagnoseByOS(node_data)
2956     output = []
2957
2958     for os_name, os_data in pol.items():
2959       row = []
2960       valid = True
2961       (variants, params, api_versions) = null_state = (set(), set(), set())
2962       for idx, osl in enumerate(os_data.values()):
2963         valid = bool(valid and osl and osl[0][1])
2964         if not valid:
2965           (variants, params, api_versions) = null_state
2966           break
2967         node_variants, node_params, node_api = osl[0][3:6]
2968         if idx == 0: # first entry
2969           variants = set(node_variants)
2970           params = set(node_params)
2971           api_versions = set(node_api)
2972         else: # keep consistency
2973           variants.intersection_update(node_variants)
2974           params.intersection_update(node_params)
2975           api_versions.intersection_update(node_api)
2976
2977       for field in self.op.output_fields:
2978         if field == "name":
2979           val = os_name
2980         elif field == "valid":
2981           val = valid
2982         elif field == "node_status":
2983           # this is just a copy of the dict
2984           val = {}
2985           for node_name, nos_list in os_data.items():
2986             val[node_name] = nos_list
2987         elif field == "variants":
2988           val = list(variants)
2989         elif field == "parameters":
2990           val = list(params)
2991         elif field == "api_versions":
2992           val = list(api_versions)
2993         else:
2994           raise errors.ParameterError(field)
2995         row.append(val)
2996       output.append(row)
2997
2998     return output
2999
3000
3001 class LURemoveNode(LogicalUnit):
3002   """Logical unit for removing a node.
3003
3004   """
3005   HPATH = "node-remove"
3006   HTYPE = constants.HTYPE_NODE
3007   _OP_REQP = ["node_name"]
3008
3009   def BuildHooksEnv(self):
3010     """Build hooks env.
3011
3012     This doesn't run on the target node in the pre phase as a failed
3013     node would then be impossible to remove.
3014
3015     """
3016     env = {
3017       "OP_TARGET": self.op.node_name,
3018       "NODE_NAME": self.op.node_name,
3019       }
3020     all_nodes = self.cfg.GetNodeList()
3021     try:
3022       all_nodes.remove(self.op.node_name)
3023     except ValueError:
3024       logging.warning("Node %s which is about to be removed not found"
3025                       " in the all nodes list", self.op.node_name)
3026     return env, all_nodes, all_nodes
3027
3028   def CheckPrereq(self):
3029     """Check prerequisites.
3030
3031     This checks:
3032      - the node exists in the configuration
3033      - it does not have primary or secondary instances
3034      - it's not the master
3035
3036     Any errors are signaled by raising errors.OpPrereqError.
3037
3038     """
3039     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3040     node = self.cfg.GetNodeInfo(self.op.node_name)
3041     assert node is not None
3042
3043     instance_list = self.cfg.GetInstanceList()
3044
3045     masternode = self.cfg.GetMasterNode()
3046     if node.name == masternode:
3047       raise errors.OpPrereqError("Node is the master node,"
3048                                  " you need to failover first.",
3049                                  errors.ECODE_INVAL)
3050
3051     for instance_name in instance_list:
3052       instance = self.cfg.GetInstanceInfo(instance_name)
3053       if node.name in instance.all_nodes:
3054         raise errors.OpPrereqError("Instance %s is still running on the node,"
3055                                    " please remove first." % instance_name,
3056                                    errors.ECODE_INVAL)
3057     self.op.node_name = node.name
3058     self.node = node
3059
3060   def Exec(self, feedback_fn):
3061     """Removes the node from the cluster.
3062
3063     """
3064     node = self.node
3065     logging.info("Stopping the node daemon and removing configs from node %s",
3066                  node.name)
3067
3068     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3069
3070     # Promote nodes to master candidate as needed
3071     _AdjustCandidatePool(self, exceptions=[node.name])
3072     self.context.RemoveNode(node.name)
3073
3074     # Run post hooks on the node before it's removed
3075     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
3076     try:
3077       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
3078     except:
3079       # pylint: disable-msg=W0702
3080       self.LogWarning("Errors occurred running hooks on %s" % node.name)
3081
3082     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3083     msg = result.fail_msg
3084     if msg:
3085       self.LogWarning("Errors encountered on the remote node while leaving"
3086                       " the cluster: %s", msg)
3087
3088     # Remove node from our /etc/hosts
3089     if self.cfg.GetClusterInfo().modify_etc_hosts:
3090       # FIXME: this should be done via an rpc call to node daemon
3091       utils.RemoveHostFromEtcHosts(node.name)
3092       _RedistributeAncillaryFiles(self)
3093
3094
3095 class LUQueryNodes(NoHooksLU):
3096   """Logical unit for querying nodes.
3097
3098   """
3099   # pylint: disable-msg=W0142
3100   _OP_REQP = ["output_fields", "names", "use_locking"]
3101   REQ_BGL = False
3102
3103   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
3104                     "master_candidate", "offline", "drained"]
3105
3106   _FIELDS_DYNAMIC = utils.FieldSet(
3107     "dtotal", "dfree",
3108     "mtotal", "mnode", "mfree",
3109     "bootid",
3110     "ctotal", "cnodes", "csockets",
3111     )
3112
3113   _FIELDS_STATIC = utils.FieldSet(*[
3114     "pinst_cnt", "sinst_cnt",
3115     "pinst_list", "sinst_list",
3116     "pip", "sip", "tags",
3117     "master",
3118     "role"] + _SIMPLE_FIELDS
3119     )
3120
3121   def CheckArguments(self):
3122     _CheckOutputFields(static=self._FIELDS_STATIC,
3123                        dynamic=self._FIELDS_DYNAMIC,
3124                        selected=self.op.output_fields)
3125
3126   def ExpandNames(self):
3127     self.needed_locks = {}
3128     self.share_locks[locking.LEVEL_NODE] = 1
3129
3130     if self.op.names:
3131       self.wanted = _GetWantedNodes(self, self.op.names)
3132     else:
3133       self.wanted = locking.ALL_SET
3134
3135     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
3136     self.do_locking = self.do_node_query and self.op.use_locking
3137     if self.do_locking:
3138       # if we don't request only static fields, we need to lock the nodes
3139       self.needed_locks[locking.LEVEL_NODE] = self.wanted
3140
3141   def CheckPrereq(self):
3142     """Check prerequisites.
3143
3144     """
3145     # The validation of the node list is done in the _GetWantedNodes,
3146     # if non empty, and if empty, there's no validation to do
3147     pass
3148
3149   def Exec(self, feedback_fn):
3150     """Computes the list of nodes and their attributes.
3151
3152     """
3153     all_info = self.cfg.GetAllNodesInfo()
3154     if self.do_locking:
3155       nodenames = self.acquired_locks[locking.LEVEL_NODE]
3156     elif self.wanted != locking.ALL_SET:
3157       nodenames = self.wanted
3158       missing = set(nodenames).difference(all_info.keys())
3159       if missing:
3160         raise errors.OpExecError(
3161           "Some nodes were removed before retrieving their data: %s" % missing)
3162     else:
3163       nodenames = all_info.keys()
3164
3165     nodenames = utils.NiceSort(nodenames)
3166     nodelist = [all_info[name] for name in nodenames]
3167
3168     # begin data gathering
3169
3170     if self.do_node_query:
3171       live_data = {}
3172       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3173                                           self.cfg.GetHypervisorType())
3174       for name in nodenames:
3175         nodeinfo = node_data[name]
3176         if not nodeinfo.fail_msg and nodeinfo.payload:
3177           nodeinfo = nodeinfo.payload
3178           fn = utils.TryConvert
3179           live_data[name] = {
3180             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3181             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3182             "mfree": fn(int, nodeinfo.get('memory_free', None)),
3183             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3184             "dfree": fn(int, nodeinfo.get('vg_free', None)),
3185             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3186             "bootid": nodeinfo.get('bootid', None),
3187             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3188             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3189             }
3190         else:
3191           live_data[name] = {}
3192     else:
3193       live_data = dict.fromkeys(nodenames, {})
3194
3195     node_to_primary = dict([(name, set()) for name in nodenames])
3196     node_to_secondary = dict([(name, set()) for name in nodenames])
3197
3198     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3199                              "sinst_cnt", "sinst_list"))
3200     if inst_fields & frozenset(self.op.output_fields):
3201       inst_data = self.cfg.GetAllInstancesInfo()
3202
3203       for inst in inst_data.values():
3204         if inst.primary_node in node_to_primary:
3205           node_to_primary[inst.primary_node].add(inst.name)
3206         for secnode in inst.secondary_nodes:
3207           if secnode in node_to_secondary:
3208             node_to_secondary[secnode].add(inst.name)
3209
3210     master_node = self.cfg.GetMasterNode()
3211
3212     # end data gathering
3213
3214     output = []
3215     for node in nodelist:
3216       node_output = []
3217       for field in self.op.output_fields:
3218         if field in self._SIMPLE_FIELDS:
3219           val = getattr(node, field)
3220         elif field == "pinst_list":
3221           val = list(node_to_primary[node.name])
3222         elif field == "sinst_list":
3223           val = list(node_to_secondary[node.name])
3224         elif field == "pinst_cnt":
3225           val = len(node_to_primary[node.name])
3226         elif field == "sinst_cnt":
3227           val = len(node_to_secondary[node.name])
3228         elif field == "pip":
3229           val = node.primary_ip
3230         elif field == "sip":
3231           val = node.secondary_ip
3232         elif field == "tags":
3233           val = list(node.GetTags())
3234         elif field == "master":
3235           val = node.name == master_node
3236         elif self._FIELDS_DYNAMIC.Matches(field):
3237           val = live_data[node.name].get(field, None)
3238         elif field == "role":
3239           if node.name == master_node:
3240             val = "M"
3241           elif node.master_candidate:
3242             val = "C"
3243           elif node.drained:
3244             val = "D"
3245           elif node.offline:
3246             val = "O"
3247           else:
3248             val = "R"
3249         else:
3250           raise errors.ParameterError(field)
3251         node_output.append(val)
3252       output.append(node_output)
3253
3254     return output
3255
3256
3257 class LUQueryNodeVolumes(NoHooksLU):
3258   """Logical unit for getting volumes on node(s).
3259
3260   """
3261   _OP_REQP = ["nodes", "output_fields"]
3262   REQ_BGL = False
3263   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3264   _FIELDS_STATIC = utils.FieldSet("node")
3265
3266   def CheckArguments(self):
3267     _CheckOutputFields(static=self._FIELDS_STATIC,
3268                        dynamic=self._FIELDS_DYNAMIC,
3269                        selected=self.op.output_fields)
3270
3271   def ExpandNames(self):
3272     self.needed_locks = {}
3273     self.share_locks[locking.LEVEL_NODE] = 1
3274     if not self.op.nodes:
3275       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3276     else:
3277       self.needed_locks[locking.LEVEL_NODE] = \
3278         _GetWantedNodes(self, self.op.nodes)
3279
3280   def CheckPrereq(self):
3281     """Check prerequisites.
3282
3283     This checks that the fields required are valid output fields.
3284
3285     """
3286     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3287
3288   def Exec(self, feedback_fn):
3289     """Computes the list of nodes and their attributes.
3290
3291     """
3292     nodenames = self.nodes
3293     volumes = self.rpc.call_node_volumes(nodenames)
3294
3295     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3296              in self.cfg.GetInstanceList()]
3297
3298     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3299
3300     output = []
3301     for node in nodenames:
3302       nresult = volumes[node]
3303       if nresult.offline:
3304         continue
3305       msg = nresult.fail_msg
3306       if msg:
3307         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3308         continue
3309
3310       node_vols = nresult.payload[:]
3311       node_vols.sort(key=lambda vol: vol['dev'])
3312
3313       for vol in node_vols:
3314         node_output = []
3315         for field in self.op.output_fields:
3316           if field == "node":
3317             val = node
3318           elif field == "phys":
3319             val = vol['dev']
3320           elif field == "vg":
3321             val = vol['vg']
3322           elif field == "name":
3323             val = vol['name']
3324           elif field == "size":
3325             val = int(float(vol['size']))
3326           elif field == "instance":
3327             for inst in ilist:
3328               if node not in lv_by_node[inst]:
3329                 continue
3330               if vol['name'] in lv_by_node[inst][node]:
3331                 val = inst.name
3332                 break
3333             else:
3334               val = '-'
3335           else:
3336             raise errors.ParameterError(field)
3337           node_output.append(str(val))
3338
3339         output.append(node_output)
3340
3341     return output
3342
3343
3344 class LUQueryNodeStorage(NoHooksLU):
3345   """Logical unit for getting information on storage units on node(s).
3346
3347   """
3348   _OP_REQP = ["nodes", "storage_type", "output_fields"]
3349   _OP_DEFS = [("name", None)]
3350   REQ_BGL = False
3351   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3352
3353   def CheckArguments(self):
3354     _CheckStorageType(self.op.storage_type)
3355
3356     _CheckOutputFields(static=self._FIELDS_STATIC,
3357                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3358                        selected=self.op.output_fields)
3359
3360   def ExpandNames(self):
3361     self.needed_locks = {}
3362     self.share_locks[locking.LEVEL_NODE] = 1
3363
3364     if self.op.nodes:
3365       self.needed_locks[locking.LEVEL_NODE] = \
3366         _GetWantedNodes(self, self.op.nodes)
3367     else:
3368       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3369
3370   def CheckPrereq(self):
3371     """Check prerequisites.
3372
3373     This checks that the fields required are valid output fields.
3374
3375     """
3376     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3377
3378   def Exec(self, feedback_fn):
3379     """Computes the list of nodes and their attributes.
3380
3381     """
3382     # Always get name to sort by
3383     if constants.SF_NAME in self.op.output_fields:
3384       fields = self.op.output_fields[:]
3385     else:
3386       fields = [constants.SF_NAME] + self.op.output_fields
3387
3388     # Never ask for node or type as it's only known to the LU
3389     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3390       while extra in fields:
3391         fields.remove(extra)
3392
3393     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3394     name_idx = field_idx[constants.SF_NAME]
3395
3396     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3397     data = self.rpc.call_storage_list(self.nodes,
3398                                       self.op.storage_type, st_args,
3399                                       self.op.name, fields)
3400
3401     result = []
3402
3403     for node in utils.NiceSort(self.nodes):
3404       nresult = data[node]
3405       if nresult.offline:
3406         continue
3407
3408       msg = nresult.fail_msg
3409       if msg:
3410         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3411         continue
3412
3413       rows = dict([(row[name_idx], row) for row in nresult.payload])
3414
3415       for name in utils.NiceSort(rows.keys()):
3416         row = rows[name]
3417
3418         out = []
3419
3420         for field in self.op.output_fields:
3421           if field == constants.SF_NODE:
3422             val = node
3423           elif field == constants.SF_TYPE:
3424             val = self.op.storage_type
3425           elif field in field_idx:
3426             val = row[field_idx[field]]
3427           else:
3428             raise errors.ParameterError(field)
3429
3430           out.append(val)
3431
3432         result.append(out)
3433
3434     return result
3435
3436
3437 class LUModifyNodeStorage(NoHooksLU):
3438   """Logical unit for modifying a storage volume on a node.
3439
3440   """
3441   _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3442   REQ_BGL = False
3443
3444   def CheckArguments(self):
3445     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3446
3447     _CheckStorageType(self.op.storage_type)
3448
3449   def ExpandNames(self):
3450     self.needed_locks = {
3451       locking.LEVEL_NODE: self.op.node_name,
3452       }
3453
3454   def CheckPrereq(self):
3455     """Check prerequisites.
3456
3457     """
3458     storage_type = self.op.storage_type
3459
3460     try:
3461       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3462     except KeyError:
3463       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3464                                  " modified" % storage_type,
3465                                  errors.ECODE_INVAL)
3466
3467     diff = set(self.op.changes.keys()) - modifiable
3468     if diff:
3469       raise errors.OpPrereqError("The following fields can not be modified for"
3470                                  " storage units of type '%s': %r" %
3471                                  (storage_type, list(diff)),
3472                                  errors.ECODE_INVAL)
3473
3474   def Exec(self, feedback_fn):
3475     """Computes the list of nodes and their attributes.
3476
3477     """
3478     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3479     result = self.rpc.call_storage_modify(self.op.node_name,
3480                                           self.op.storage_type, st_args,
3481                                           self.op.name, self.op.changes)
3482     result.Raise("Failed to modify storage unit '%s' on %s" %
3483                  (self.op.name, self.op.node_name))
3484
3485
3486 class LUAddNode(LogicalUnit):
3487   """Logical unit for adding node to the cluster.
3488
3489   """
3490   HPATH = "node-add"
3491   HTYPE = constants.HTYPE_NODE
3492   _OP_REQP = ["node_name"]
3493   _OP_DEFS = [("secondary_ip", None)]
3494
3495   def CheckArguments(self):
3496     # validate/normalize the node name
3497     self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3498
3499   def BuildHooksEnv(self):
3500     """Build hooks env.
3501
3502     This will run on all nodes before, and on all nodes + the new node after.
3503
3504     """
3505     env = {
3506       "OP_TARGET": self.op.node_name,
3507       "NODE_NAME": self.op.node_name,
3508       "NODE_PIP": self.op.primary_ip,
3509       "NODE_SIP": self.op.secondary_ip,
3510       }
3511     nodes_0 = self.cfg.GetNodeList()
3512     nodes_1 = nodes_0 + [self.op.node_name, ]
3513     return env, nodes_0, nodes_1
3514
3515   def CheckPrereq(self):
3516     """Check prerequisites.
3517
3518     This checks:
3519      - the new node is not already in the config
3520      - it is resolvable
3521      - its parameters (single/dual homed) matches the cluster
3522
3523     Any errors are signaled by raising errors.OpPrereqError.
3524
3525     """
3526     node_name = self.op.node_name
3527     cfg = self.cfg
3528
3529     dns_data = utils.GetHostInfo(node_name)
3530
3531     node = dns_data.name
3532     primary_ip = self.op.primary_ip = dns_data.ip
3533     if self.op.secondary_ip is None:
3534       self.op.secondary_ip = primary_ip
3535     if not utils.IsValidIP(self.op.secondary_ip):
3536       raise errors.OpPrereqError("Invalid secondary IP given",
3537                                  errors.ECODE_INVAL)
3538     secondary_ip = self.op.secondary_ip
3539
3540     node_list = cfg.GetNodeList()
3541     if not self.op.readd and node in node_list:
3542       raise errors.OpPrereqError("Node %s is already in the configuration" %
3543                                  node, errors.ECODE_EXISTS)
3544     elif self.op.readd and node not in node_list:
3545       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3546                                  errors.ECODE_NOENT)
3547
3548     self.changed_primary_ip = False
3549
3550     for existing_node_name in node_list:
3551       existing_node = cfg.GetNodeInfo(existing_node_name)
3552
3553       if self.op.readd and node == existing_node_name:
3554         if existing_node.secondary_ip != secondary_ip:
3555           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3556                                      " address configuration as before",
3557                                      errors.ECODE_INVAL)
3558         if existing_node.primary_ip != primary_ip:
3559           self.changed_primary_ip = True
3560
3561         continue
3562
3563       if (existing_node.primary_ip == primary_ip or
3564           existing_node.secondary_ip == primary_ip or
3565           existing_node.primary_ip == secondary_ip or
3566           existing_node.secondary_ip == secondary_ip):
3567         raise errors.OpPrereqError("New node ip address(es) conflict with"
3568                                    " existing node %s" % existing_node.name,
3569                                    errors.ECODE_NOTUNIQUE)
3570
3571     # check that the type of the node (single versus dual homed) is the
3572     # same as for the master
3573     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3574     master_singlehomed = myself.secondary_ip == myself.primary_ip
3575     newbie_singlehomed = secondary_ip == primary_ip
3576     if master_singlehomed != newbie_singlehomed:
3577       if master_singlehomed:
3578         raise errors.OpPrereqError("The master has no private ip but the"
3579                                    " new node has one",
3580                                    errors.ECODE_INVAL)
3581       else:
3582         raise errors.OpPrereqError("The master has a private ip but the"
3583                                    " new node doesn't have one",
3584                                    errors.ECODE_INVAL)
3585
3586     # checks reachability
3587     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3588       raise errors.OpPrereqError("Node not reachable by ping",
3589                                  errors.ECODE_ENVIRON)
3590
3591     if not newbie_singlehomed:
3592       # check reachability from my secondary ip to newbie's secondary ip
3593       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3594                            source=myself.secondary_ip):
3595         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3596                                    " based ping to noded port",
3597                                    errors.ECODE_ENVIRON)
3598
3599     if self.op.readd:
3600       exceptions = [node]
3601     else:
3602       exceptions = []
3603
3604     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3605
3606     if self.op.readd:
3607       self.new_node = self.cfg.GetNodeInfo(node)
3608       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3609     else:
3610       self.new_node = objects.Node(name=node,
3611                                    primary_ip=primary_ip,
3612                                    secondary_ip=secondary_ip,
3613                                    master_candidate=self.master_candidate,
3614                                    offline=False, drained=False)
3615
3616   def Exec(self, feedback_fn):
3617     """Adds the new node to the cluster.
3618
3619     """
3620     new_node = self.new_node
3621     node = new_node.name
3622
3623     # for re-adds, reset the offline/drained/master-candidate flags;
3624     # we need to reset here, otherwise offline would prevent RPC calls
3625     # later in the procedure; this also means that if the re-add
3626     # fails, we are left with a non-offlined, broken node
3627     if self.op.readd:
3628       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3629       self.LogInfo("Readding a node, the offline/drained flags were reset")
3630       # if we demote the node, we do cleanup later in the procedure
3631       new_node.master_candidate = self.master_candidate
3632       if self.changed_primary_ip:
3633         new_node.primary_ip = self.op.primary_ip
3634
3635     # notify the user about any possible mc promotion
3636     if new_node.master_candidate:
3637       self.LogInfo("Node will be a master candidate")
3638
3639     # check connectivity
3640     result = self.rpc.call_version([node])[node]
3641     result.Raise("Can't get version information from node %s" % node)
3642     if constants.PROTOCOL_VERSION == result.payload:
3643       logging.info("Communication to node %s fine, sw version %s match",
3644                    node, result.payload)
3645     else:
3646       raise errors.OpExecError("Version mismatch master version %s,"
3647                                " node version %s" %
3648                                (constants.PROTOCOL_VERSION, result.payload))
3649
3650     # setup ssh on node
3651     if self.cfg.GetClusterInfo().modify_ssh_setup:
3652       logging.info("Copy ssh key to node %s", node)
3653       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3654       keyarray = []
3655       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3656                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3657                   priv_key, pub_key]
3658
3659       for i in keyfiles:
3660         keyarray.append(utils.ReadFile(i))
3661
3662       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3663                                       keyarray[2], keyarray[3], keyarray[4],
3664                                       keyarray[5])
3665       result.Raise("Cannot transfer ssh keys to the new node")
3666
3667     # Add node to our /etc/hosts, and add key to known_hosts
3668     if self.cfg.GetClusterInfo().modify_etc_hosts:
3669       # FIXME: this should be done via an rpc call to node daemon
3670       utils.AddHostToEtcHosts(new_node.name)
3671
3672     if new_node.secondary_ip != new_node.primary_ip:
3673       result = self.rpc.call_node_has_ip_address(new_node.name,
3674                                                  new_node.secondary_ip)
3675       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3676                    prereq=True, ecode=errors.ECODE_ENVIRON)
3677       if not result.payload:
3678         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3679                                  " you gave (%s). Please fix and re-run this"
3680                                  " command." % new_node.secondary_ip)
3681
3682     node_verify_list = [self.cfg.GetMasterNode()]
3683     node_verify_param = {
3684       constants.NV_NODELIST: [node],
3685       # TODO: do a node-net-test as well?
3686     }
3687
3688     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3689                                        self.cfg.GetClusterName())
3690     for verifier in node_verify_list:
3691       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3692       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3693       if nl_payload:
3694         for failed in nl_payload:
3695           feedback_fn("ssh/hostname verification failed"
3696                       " (checking from %s): %s" %
3697                       (verifier, nl_payload[failed]))
3698         raise errors.OpExecError("ssh/hostname verification failed.")
3699
3700     if self.op.readd:
3701       _RedistributeAncillaryFiles(self)
3702       self.context.ReaddNode(new_node)
3703       # make sure we redistribute the config
3704       self.cfg.Update(new_node, feedback_fn)
3705       # and make sure the new node will not have old files around
3706       if not new_node.master_candidate:
3707         result = self.rpc.call_node_demote_from_mc(new_node.name)
3708         msg = result.fail_msg
3709         if msg:
3710           self.LogWarning("Node failed to demote itself from master"
3711                           " candidate status: %s" % msg)
3712     else:
3713       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3714       self.context.AddNode(new_node, self.proc.GetECId())
3715
3716
3717 class LUSetNodeParams(LogicalUnit):
3718   """Modifies the parameters of a node.
3719
3720   """
3721   HPATH = "node-modify"
3722   HTYPE = constants.HTYPE_NODE
3723   _OP_REQP = ["node_name"]
3724   REQ_BGL = False
3725
3726   def CheckArguments(self):
3727     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3728     _CheckBooleanOpField(self.op, 'master_candidate')
3729     _CheckBooleanOpField(self.op, 'offline')
3730     _CheckBooleanOpField(self.op, 'drained')
3731     _CheckBooleanOpField(self.op, 'auto_promote')
3732     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3733     if all_mods.count(None) == 3:
3734       raise errors.OpPrereqError("Please pass at least one modification",
3735                                  errors.ECODE_INVAL)
3736     if all_mods.count(True) > 1:
3737       raise errors.OpPrereqError("Can't set the node into more than one"
3738                                  " state at the same time",
3739                                  errors.ECODE_INVAL)
3740
3741     # Boolean value that tells us whether we're offlining or draining the node
3742     self.offline_or_drain = (self.op.offline == True or
3743                              self.op.drained == True)
3744     self.deoffline_or_drain = (self.op.offline == False or
3745                                self.op.drained == False)
3746     self.might_demote = (self.op.master_candidate == False or
3747                          self.offline_or_drain)
3748
3749     self.lock_all = self.op.auto_promote and self.might_demote
3750
3751
3752   def ExpandNames(self):
3753     if self.lock_all:
3754       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3755     else:
3756       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3757
3758   def BuildHooksEnv(self):
3759     """Build hooks env.
3760
3761     This runs on the master node.
3762
3763     """
3764     env = {
3765       "OP_TARGET": self.op.node_name,
3766       "MASTER_CANDIDATE": str(self.op.master_candidate),
3767       "OFFLINE": str(self.op.offline),
3768       "DRAINED": str(self.op.drained),
3769       }
3770     nl = [self.cfg.GetMasterNode(),
3771           self.op.node_name]
3772     return env, nl, nl
3773
3774   def CheckPrereq(self):
3775     """Check prerequisites.
3776
3777     This only checks the instance list against the existing names.
3778
3779     """
3780     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3781
3782     if (self.op.master_candidate is not None or
3783         self.op.drained is not None or
3784         self.op.offline is not None):
3785       # we can't change the master's node flags
3786       if self.op.node_name == self.cfg.GetMasterNode():
3787         raise errors.OpPrereqError("The master role can be changed"
3788                                    " only via masterfailover",
3789                                    errors.ECODE_INVAL)
3790
3791
3792     if node.master_candidate and self.might_demote and not self.lock_all:
3793       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3794       # check if after removing the current node, we're missing master
3795       # candidates
3796       (mc_remaining, mc_should, _) = \
3797           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3798       if mc_remaining < mc_should:
3799         raise errors.OpPrereqError("Not enough master candidates, please"
3800                                    " pass auto_promote to allow promotion",
3801                                    errors.ECODE_INVAL)
3802
3803     if (self.op.master_candidate == True and
3804         ((node.offline and not self.op.offline == False) or
3805          (node.drained and not self.op.drained == False))):
3806       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3807                                  " to master_candidate" % node.name,
3808                                  errors.ECODE_INVAL)
3809
3810     # If we're being deofflined/drained, we'll MC ourself if needed
3811     if (self.deoffline_or_drain and not self.offline_or_drain and not
3812         self.op.master_candidate == True and not node.master_candidate):
3813       self.op.master_candidate = _DecideSelfPromotion(self)
3814       if self.op.master_candidate:
3815         self.LogInfo("Autopromoting node to master candidate")
3816
3817     return
3818
3819   def Exec(self, feedback_fn):
3820     """Modifies a node.
3821
3822     """
3823     node = self.node
3824
3825     result = []
3826     changed_mc = False
3827
3828     if self.op.offline is not None:
3829       node.offline = self.op.offline
3830       result.append(("offline", str(self.op.offline)))
3831       if self.op.offline == True:
3832         if node.master_candidate:
3833           node.master_candidate = False
3834           changed_mc = True
3835           result.append(("master_candidate", "auto-demotion due to offline"))
3836         if node.drained:
3837           node.drained = False
3838           result.append(("drained", "clear drained status due to offline"))
3839
3840     if self.op.master_candidate is not None:
3841       node.master_candidate = self.op.master_candidate
3842       changed_mc = True
3843       result.append(("master_candidate", str(self.op.master_candidate)))
3844       if self.op.master_candidate == False:
3845         rrc = self.rpc.call_node_demote_from_mc(node.name)
3846         msg = rrc.fail_msg
3847         if msg:
3848           self.LogWarning("Node failed to demote itself: %s" % msg)
3849
3850     if self.op.drained is not None:
3851       node.drained = self.op.drained
3852       result.append(("drained", str(self.op.drained)))
3853       if self.op.drained == True:
3854         if node.master_candidate:
3855           node.master_candidate = False
3856           changed_mc = True
3857           result.append(("master_candidate", "auto-demotion due to drain"))
3858           rrc = self.rpc.call_node_demote_from_mc(node.name)
3859           msg = rrc.fail_msg
3860           if msg:
3861             self.LogWarning("Node failed to demote itself: %s" % msg)
3862         if node.offline:
3863           node.offline = False
3864           result.append(("offline", "clear offline status due to drain"))
3865
3866     # we locked all nodes, we adjust the CP before updating this node
3867     if self.lock_all:
3868       _AdjustCandidatePool(self, [node.name])
3869
3870     # this will trigger configuration file update, if needed
3871     self.cfg.Update(node, feedback_fn)
3872
3873     # this will trigger job queue propagation or cleanup
3874     if changed_mc:
3875       self.context.ReaddNode(node)
3876
3877     return result
3878
3879
3880 class LUPowercycleNode(NoHooksLU):
3881   """Powercycles a node.
3882
3883   """
3884   _OP_REQP = ["node_name", "force"]
3885   REQ_BGL = False
3886
3887   def CheckArguments(self):
3888     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3889     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3890       raise errors.OpPrereqError("The node is the master and the force"
3891                                  " parameter was not set",
3892                                  errors.ECODE_INVAL)
3893
3894   def ExpandNames(self):
3895     """Locking for PowercycleNode.
3896
3897     This is a last-resort option and shouldn't block on other
3898     jobs. Therefore, we grab no locks.
3899
3900     """
3901     self.needed_locks = {}
3902
3903   def CheckPrereq(self):
3904     """Check prerequisites.
3905
3906     This LU has no prereqs.
3907
3908     """
3909     pass
3910
3911   def Exec(self, feedback_fn):
3912     """Reboots a node.
3913
3914     """
3915     result = self.rpc.call_node_powercycle(self.op.node_name,
3916                                            self.cfg.GetHypervisorType())
3917     result.Raise("Failed to schedule the reboot")
3918     return result.payload
3919
3920
3921 class LUQueryClusterInfo(NoHooksLU):
3922   """Query cluster configuration.
3923
3924   """
3925   _OP_REQP = []
3926   REQ_BGL = False
3927
3928   def ExpandNames(self):
3929     self.needed_locks = {}
3930
3931   def CheckPrereq(self):
3932     """No prerequsites needed for this LU.
3933
3934     """
3935     pass
3936
3937   def Exec(self, feedback_fn):
3938     """Return cluster config.
3939
3940     """
3941     cluster = self.cfg.GetClusterInfo()
3942     os_hvp = {}
3943
3944     # Filter just for enabled hypervisors
3945     for os_name, hv_dict in cluster.os_hvp.items():
3946       os_hvp[os_name] = {}
3947       for hv_name, hv_params in hv_dict.items():
3948         if hv_name in cluster.enabled_hypervisors:
3949           os_hvp[os_name][hv_name] = hv_params
3950
3951     result = {
3952       "software_version": constants.RELEASE_VERSION,
3953       "protocol_version": constants.PROTOCOL_VERSION,
3954       "config_version": constants.CONFIG_VERSION,
3955       "os_api_version": max(constants.OS_API_VERSIONS),
3956       "export_version": constants.EXPORT_VERSION,
3957       "architecture": (platform.architecture()[0], platform.machine()),
3958       "name": cluster.cluster_name,
3959       "master": cluster.master_node,
3960       "default_hypervisor": cluster.enabled_hypervisors[0],
3961       "enabled_hypervisors": cluster.enabled_hypervisors,
3962       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3963                         for hypervisor_name in cluster.enabled_hypervisors]),
3964       "os_hvp": os_hvp,
3965       "beparams": cluster.beparams,
3966       "osparams": cluster.osparams,
3967       "nicparams": cluster.nicparams,
3968       "candidate_pool_size": cluster.candidate_pool_size,
3969       "master_netdev": cluster.master_netdev,
3970       "volume_group_name": cluster.volume_group_name,
3971       "file_storage_dir": cluster.file_storage_dir,
3972       "maintain_node_health": cluster.maintain_node_health,
3973       "ctime": cluster.ctime,
3974       "mtime": cluster.mtime,
3975       "uuid": cluster.uuid,
3976       "tags": list(cluster.GetTags()),
3977       "uid_pool": cluster.uid_pool,
3978       }
3979
3980     return result
3981
3982
3983 class LUQueryConfigValues(NoHooksLU):
3984   """Return configuration values.
3985
3986   """
3987   _OP_REQP = []
3988   REQ_BGL = False
3989   _FIELDS_DYNAMIC = utils.FieldSet()
3990   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3991                                   "watcher_pause")
3992
3993   def CheckArguments(self):
3994     _CheckOutputFields(static=self._FIELDS_STATIC,
3995                        dynamic=self._FIELDS_DYNAMIC,
3996                        selected=self.op.output_fields)
3997
3998   def ExpandNames(self):
3999     self.needed_locks = {}
4000
4001   def CheckPrereq(self):
4002     """No prerequisites.
4003
4004     """
4005     pass
4006
4007   def Exec(self, feedback_fn):
4008     """Dump a representation of the cluster config to the standard output.
4009
4010     """
4011     values = []
4012     for field in self.op.output_fields:
4013       if field == "cluster_name":
4014         entry = self.cfg.GetClusterName()
4015       elif field == "master_node":
4016         entry = self.cfg.GetMasterNode()
4017       elif field == "drain_flag":
4018         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4019       elif field == "watcher_pause":
4020         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4021       else:
4022         raise errors.ParameterError(field)
4023       values.append(entry)
4024     return values
4025
4026
4027 class LUActivateInstanceDisks(NoHooksLU):
4028   """Bring up an instance's disks.
4029
4030   """
4031   _OP_REQP = ["instance_name"]
4032   _OP_DEFS = [("ignore_size", False)]
4033   REQ_BGL = False
4034
4035   def ExpandNames(self):
4036     self._ExpandAndLockInstance()
4037     self.needed_locks[locking.LEVEL_NODE] = []
4038     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4039
4040   def DeclareLocks(self, level):
4041     if level == locking.LEVEL_NODE:
4042       self._LockInstancesNodes()
4043
4044   def CheckPrereq(self):
4045     """Check prerequisites.
4046
4047     This checks that the instance is in the cluster.
4048
4049     """
4050     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4051     assert self.instance is not None, \
4052       "Cannot retrieve locked instance %s" % self.op.instance_name
4053     _CheckNodeOnline(self, self.instance.primary_node)
4054
4055   def Exec(self, feedback_fn):
4056     """Activate the disks.
4057
4058     """
4059     disks_ok, disks_info = \
4060               _AssembleInstanceDisks(self, self.instance,
4061                                      ignore_size=self.op.ignore_size)
4062     if not disks_ok:
4063       raise errors.OpExecError("Cannot activate block devices")
4064
4065     return disks_info
4066
4067
4068 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
4069                            ignore_size=False):
4070   """Prepare the block devices for an instance.
4071
4072   This sets up the block devices on all nodes.
4073
4074   @type lu: L{LogicalUnit}
4075   @param lu: the logical unit on whose behalf we execute
4076   @type instance: L{objects.Instance}
4077   @param instance: the instance for whose disks we assemble
4078   @type disks: list of L{objects.Disk} or None
4079   @param disks: which disks to assemble (or all, if None)
4080   @type ignore_secondaries: boolean
4081   @param ignore_secondaries: if true, errors on secondary nodes
4082       won't result in an error return from the function
4083   @type ignore_size: boolean
4084   @param ignore_size: if true, the current known size of the disk
4085       will not be used during the disk activation, useful for cases
4086       when the size is wrong
4087   @return: False if the operation failed, otherwise a list of
4088       (host, instance_visible_name, node_visible_name)
4089       with the mapping from node devices to instance devices
4090
4091   """
4092   device_info = []
4093   disks_ok = True
4094   iname = instance.name
4095   disks = _ExpandCheckDisks(instance, disks)
4096
4097   # With the two passes mechanism we try to reduce the window of
4098   # opportunity for the race condition of switching DRBD to primary
4099   # before handshaking occured, but we do not eliminate it
4100
4101   # The proper fix would be to wait (with some limits) until the
4102   # connection has been made and drbd transitions from WFConnection
4103   # into any other network-connected state (Connected, SyncTarget,
4104   # SyncSource, etc.)
4105
4106   # 1st pass, assemble on all nodes in secondary mode
4107   for inst_disk in disks:
4108     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4109       if ignore_size:
4110         node_disk = node_disk.Copy()
4111         node_disk.UnsetSize()
4112       lu.cfg.SetDiskID(node_disk, node)
4113       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
4114       msg = result.fail_msg
4115       if msg:
4116         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4117                            " (is_primary=False, pass=1): %s",
4118                            inst_disk.iv_name, node, msg)
4119         if not ignore_secondaries:
4120           disks_ok = False
4121
4122   # FIXME: race condition on drbd migration to primary
4123
4124   # 2nd pass, do only the primary node
4125   for inst_disk in disks:
4126     dev_path = None
4127
4128     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
4129       if node != instance.primary_node:
4130         continue
4131       if ignore_size:
4132         node_disk = node_disk.Copy()
4133         node_disk.UnsetSize()
4134       lu.cfg.SetDiskID(node_disk, node)
4135       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
4136       msg = result.fail_msg
4137       if msg:
4138         lu.proc.LogWarning("Could not prepare block device %s on node %s"
4139                            " (is_primary=True, pass=2): %s",
4140                            inst_disk.iv_name, node, msg)
4141         disks_ok = False
4142       else:
4143         dev_path = result.payload
4144
4145     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4146
4147   # leave the disks configured for the primary node
4148   # this is a workaround that would be fixed better by
4149   # improving the logical/physical id handling
4150   for disk in disks:
4151     lu.cfg.SetDiskID(disk, instance.primary_node)
4152
4153   return disks_ok, device_info
4154
4155
4156 def _StartInstanceDisks(lu, instance, force):
4157   """Start the disks of an instance.
4158
4159   """
4160   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4161                                            ignore_secondaries=force)
4162   if not disks_ok:
4163     _ShutdownInstanceDisks(lu, instance)
4164     if force is not None and not force:
4165       lu.proc.LogWarning("", hint="If the message above refers to a"
4166                          " secondary node,"
4167                          " you can retry the operation using '--force'.")
4168     raise errors.OpExecError("Disk consistency error")
4169
4170
4171 class LUDeactivateInstanceDisks(NoHooksLU):
4172   """Shutdown an instance's disks.
4173
4174   """
4175   _OP_REQP = ["instance_name"]
4176   REQ_BGL = False
4177
4178   def ExpandNames(self):
4179     self._ExpandAndLockInstance()
4180     self.needed_locks[locking.LEVEL_NODE] = []
4181     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4182
4183   def DeclareLocks(self, level):
4184     if level == locking.LEVEL_NODE:
4185       self._LockInstancesNodes()
4186
4187   def CheckPrereq(self):
4188     """Check prerequisites.
4189
4190     This checks that the instance is in the cluster.
4191
4192     """
4193     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4194     assert self.instance is not None, \
4195       "Cannot retrieve locked instance %s" % self.op.instance_name
4196
4197   def Exec(self, feedback_fn):
4198     """Deactivate the disks
4199
4200     """
4201     instance = self.instance
4202     _SafeShutdownInstanceDisks(self, instance)
4203
4204
4205 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4206   """Shutdown block devices of an instance.
4207
4208   This function checks if an instance is running, before calling
4209   _ShutdownInstanceDisks.
4210
4211   """
4212   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4213   _ShutdownInstanceDisks(lu, instance, disks=disks)
4214
4215
4216 def _ExpandCheckDisks(instance, disks):
4217   """Return the instance disks selected by the disks list
4218
4219   @type disks: list of L{objects.Disk} or None
4220   @param disks: selected disks
4221   @rtype: list of L{objects.Disk}
4222   @return: selected instance disks to act on
4223
4224   """
4225   if disks is None:
4226     return instance.disks
4227   else:
4228     if not set(disks).issubset(instance.disks):
4229       raise errors.ProgrammerError("Can only act on disks belonging to the"
4230                                    " target instance")
4231     return disks
4232
4233
4234 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4235   """Shutdown block devices of an instance.
4236
4237   This does the shutdown on all nodes of the instance.
4238
4239   If the ignore_primary is false, errors on the primary node are
4240   ignored.
4241
4242   """
4243   all_result = True
4244   disks = _ExpandCheckDisks(instance, disks)
4245
4246   for disk in disks:
4247     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4248       lu.cfg.SetDiskID(top_disk, node)
4249       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4250       msg = result.fail_msg
4251       if msg:
4252         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4253                       disk.iv_name, node, msg)
4254         if not ignore_primary or node != instance.primary_node:
4255           all_result = False
4256   return all_result
4257
4258
4259 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4260   """Checks if a node has enough free memory.
4261
4262   This function check if a given node has the needed amount of free
4263   memory. In case the node has less memory or we cannot get the
4264   information from the node, this function raise an OpPrereqError
4265   exception.
4266
4267   @type lu: C{LogicalUnit}
4268   @param lu: a logical unit from which we get configuration data
4269   @type node: C{str}
4270   @param node: the node to check
4271   @type reason: C{str}
4272   @param reason: string to use in the error message
4273   @type requested: C{int}
4274   @param requested: the amount of memory in MiB to check for
4275   @type hypervisor_name: C{str}
4276   @param hypervisor_name: the hypervisor to ask for memory stats
4277   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4278       we cannot check the node
4279
4280   """
4281   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4282   nodeinfo[node].Raise("Can't get data from node %s" % node,
4283                        prereq=True, ecode=errors.ECODE_ENVIRON)
4284   free_mem = nodeinfo[node].payload.get('memory_free', None)
4285   if not isinstance(free_mem, int):
4286     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4287                                " was '%s'" % (node, free_mem),
4288                                errors.ECODE_ENVIRON)
4289   if requested > free_mem:
4290     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4291                                " needed %s MiB, available %s MiB" %
4292                                (node, reason, requested, free_mem),
4293                                errors.ECODE_NORES)
4294
4295
4296 def _CheckNodesFreeDisk(lu, nodenames, requested):
4297   """Checks if nodes have enough free disk space in the default VG.
4298
4299   This function check if all given nodes have the needed amount of
4300   free disk. In case any node has less disk or we cannot get the
4301   information from the node, this function raise an OpPrereqError
4302   exception.
4303
4304   @type lu: C{LogicalUnit}
4305   @param lu: a logical unit from which we get configuration data
4306   @type nodenames: C{list}
4307   @param nodenames: the list of node names to check
4308   @type requested: C{int}
4309   @param requested: the amount of disk in MiB to check for
4310   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4311       we cannot check the node
4312
4313   """
4314   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4315                                    lu.cfg.GetHypervisorType())
4316   for node in nodenames:
4317     info = nodeinfo[node]
4318     info.Raise("Cannot get current information from node %s" % node,
4319                prereq=True, ecode=errors.ECODE_ENVIRON)
4320     vg_free = info.payload.get("vg_free", None)
4321     if not isinstance(vg_free, int):
4322       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4323                                  " result was '%s'" % (node, vg_free),
4324                                  errors.ECODE_ENVIRON)
4325     if requested > vg_free:
4326       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4327                                  " required %d MiB, available %d MiB" %
4328                                  (node, requested, vg_free),
4329                                  errors.ECODE_NORES)
4330
4331
4332 class LUStartupInstance(LogicalUnit):
4333   """Starts an instance.
4334
4335   """
4336   HPATH = "instance-start"
4337   HTYPE = constants.HTYPE_INSTANCE
4338   _OP_REQP = ["instance_name", "force"]
4339   _OP_DEFS = [
4340     ("beparams", _EmptyDict),
4341     ("hvparams", _EmptyDict),
4342     ]
4343   REQ_BGL = False
4344
4345   def ExpandNames(self):
4346     self._ExpandAndLockInstance()
4347
4348   def BuildHooksEnv(self):
4349     """Build hooks env.
4350
4351     This runs on master, primary and secondary nodes of the instance.
4352
4353     """
4354     env = {
4355       "FORCE": self.op.force,
4356       }
4357     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4358     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4359     return env, nl, nl
4360
4361   def CheckPrereq(self):
4362     """Check prerequisites.
4363
4364     This checks that the instance is in the cluster.
4365
4366     """
4367     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4368     assert self.instance is not None, \
4369       "Cannot retrieve locked instance %s" % self.op.instance_name
4370
4371     # extra beparams
4372     if self.op.beparams:
4373       if not isinstance(self.op.beparams, dict):
4374         raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4375                                    " dict" % (type(self.op.beparams), ),
4376                                    errors.ECODE_INVAL)
4377       # fill the beparams dict
4378       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
4379
4380     # extra hvparams
4381     if self.op.hvparams:
4382       if not isinstance(self.op.hvparams, dict):
4383         raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4384                                    " dict" % (type(self.op.hvparams), ),
4385                                    errors.ECODE_INVAL)
4386
4387       # check hypervisor parameter syntax (locally)
4388       cluster = self.cfg.GetClusterInfo()
4389       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
4390       filled_hvp = cluster.FillHV(instance)
4391       filled_hvp.update(self.op.hvparams)
4392       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4393       hv_type.CheckParameterSyntax(filled_hvp)
4394       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4395
4396     _CheckNodeOnline(self, instance.primary_node)
4397
4398     bep = self.cfg.GetClusterInfo().FillBE(instance)
4399     # check bridges existence
4400     _CheckInstanceBridgesExist(self, instance)
4401
4402     remote_info = self.rpc.call_instance_info(instance.primary_node,
4403                                               instance.name,
4404                                               instance.hypervisor)
4405     remote_info.Raise("Error checking node %s" % instance.primary_node,
4406                       prereq=True, ecode=errors.ECODE_ENVIRON)
4407     if not remote_info.payload: # not running already
4408       _CheckNodeFreeMemory(self, instance.primary_node,
4409                            "starting instance %s" % instance.name,
4410                            bep[constants.BE_MEMORY], instance.hypervisor)
4411
4412   def Exec(self, feedback_fn):
4413     """Start the instance.
4414
4415     """
4416     instance = self.instance
4417     force = self.op.force
4418
4419     self.cfg.MarkInstanceUp(instance.name)
4420
4421     node_current = instance.primary_node
4422
4423     _StartInstanceDisks(self, instance, force)
4424
4425     result = self.rpc.call_instance_start(node_current, instance,
4426                                           self.op.hvparams, self.op.beparams)
4427     msg = result.fail_msg
4428     if msg:
4429       _ShutdownInstanceDisks(self, instance)
4430       raise errors.OpExecError("Could not start instance: %s" % msg)
4431
4432
4433 class LURebootInstance(LogicalUnit):
4434   """Reboot an instance.
4435
4436   """
4437   HPATH = "instance-reboot"
4438   HTYPE = constants.HTYPE_INSTANCE
4439   _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4440   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4441   REQ_BGL = False
4442
4443   def CheckArguments(self):
4444     if self.op.reboot_type not in constants.REBOOT_TYPES:
4445       raise errors.OpPrereqError("Invalid reboot type '%s', not one of %s" %
4446                                   (self.op.reboot_type,
4447                                    utils.CommaJoin(constants.REBOOT_TYPES)),
4448                                  errors.ECODE_INVAL)
4449
4450   def ExpandNames(self):
4451     self._ExpandAndLockInstance()
4452
4453   def BuildHooksEnv(self):
4454     """Build hooks env.
4455
4456     This runs on master, primary and secondary nodes of the instance.
4457
4458     """
4459     env = {
4460       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4461       "REBOOT_TYPE": self.op.reboot_type,
4462       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
4463       }
4464     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4465     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4466     return env, nl, nl
4467
4468   def CheckPrereq(self):
4469     """Check prerequisites.
4470
4471     This checks that the instance is in the cluster.
4472
4473     """
4474     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4475     assert self.instance is not None, \
4476       "Cannot retrieve locked instance %s" % self.op.instance_name
4477
4478     _CheckNodeOnline(self, instance.primary_node)
4479
4480     # check bridges existence
4481     _CheckInstanceBridgesExist(self, instance)
4482
4483   def Exec(self, feedback_fn):
4484     """Reboot the instance.
4485
4486     """
4487     instance = self.instance
4488     ignore_secondaries = self.op.ignore_secondaries
4489     reboot_type = self.op.reboot_type
4490
4491     node_current = instance.primary_node
4492
4493     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4494                        constants.INSTANCE_REBOOT_HARD]:
4495       for disk in instance.disks:
4496         self.cfg.SetDiskID(disk, node_current)
4497       result = self.rpc.call_instance_reboot(node_current, instance,
4498                                              reboot_type,
4499                                              self.op.shutdown_timeout)
4500       result.Raise("Could not reboot instance")
4501     else:
4502       result = self.rpc.call_instance_shutdown(node_current, instance,
4503                                                self.op.shutdown_timeout)
4504       result.Raise("Could not shutdown instance for full reboot")
4505       _ShutdownInstanceDisks(self, instance)
4506       _StartInstanceDisks(self, instance, ignore_secondaries)
4507       result = self.rpc.call_instance_start(node_current, instance, None, None)
4508       msg = result.fail_msg
4509       if msg:
4510         _ShutdownInstanceDisks(self, instance)
4511         raise errors.OpExecError("Could not start instance for"
4512                                  " full reboot: %s" % msg)
4513
4514     self.cfg.MarkInstanceUp(instance.name)
4515
4516
4517 class LUShutdownInstance(LogicalUnit):
4518   """Shutdown an instance.
4519
4520   """
4521   HPATH = "instance-stop"
4522   HTYPE = constants.HTYPE_INSTANCE
4523   _OP_REQP = ["instance_name"]
4524   _OP_DEFS = [("timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4525   REQ_BGL = False
4526
4527   def ExpandNames(self):
4528     self._ExpandAndLockInstance()
4529
4530   def BuildHooksEnv(self):
4531     """Build hooks env.
4532
4533     This runs on master, primary and secondary nodes of the instance.
4534
4535     """
4536     env = _BuildInstanceHookEnvByObject(self, self.instance)
4537     env["TIMEOUT"] = self.op.timeout
4538     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4539     return env, nl, nl
4540
4541   def CheckPrereq(self):
4542     """Check prerequisites.
4543
4544     This checks that the instance is in the cluster.
4545
4546     """
4547     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4548     assert self.instance is not None, \
4549       "Cannot retrieve locked instance %s" % self.op.instance_name
4550     _CheckNodeOnline(self, self.instance.primary_node)
4551
4552   def Exec(self, feedback_fn):
4553     """Shutdown the instance.
4554
4555     """
4556     instance = self.instance
4557     node_current = instance.primary_node
4558     timeout = self.op.timeout
4559     self.cfg.MarkInstanceDown(instance.name)
4560     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4561     msg = result.fail_msg
4562     if msg:
4563       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4564
4565     _ShutdownInstanceDisks(self, instance)
4566
4567
4568 class LUReinstallInstance(LogicalUnit):
4569   """Reinstall an instance.
4570
4571   """
4572   HPATH = "instance-reinstall"
4573   HTYPE = constants.HTYPE_INSTANCE
4574   _OP_REQP = ["instance_name"]
4575   _OP_DEFS = [
4576     ("os_type", None),
4577     ("force_variant", False),
4578     ]
4579   REQ_BGL = False
4580
4581   def ExpandNames(self):
4582     self._ExpandAndLockInstance()
4583
4584   def BuildHooksEnv(self):
4585     """Build hooks env.
4586
4587     This runs on master, primary and secondary nodes of the instance.
4588
4589     """
4590     env = _BuildInstanceHookEnvByObject(self, self.instance)
4591     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4592     return env, nl, nl
4593
4594   def CheckPrereq(self):
4595     """Check prerequisites.
4596
4597     This checks that the instance is in the cluster and is not running.
4598
4599     """
4600     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4601     assert instance is not None, \
4602       "Cannot retrieve locked instance %s" % self.op.instance_name
4603     _CheckNodeOnline(self, instance.primary_node)
4604
4605     if instance.disk_template == constants.DT_DISKLESS:
4606       raise errors.OpPrereqError("Instance '%s' has no disks" %
4607                                  self.op.instance_name,
4608                                  errors.ECODE_INVAL)
4609     _CheckInstanceDown(self, instance, "cannot reinstall")
4610
4611     if self.op.os_type is not None:
4612       # OS verification
4613       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4614       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4615
4616     self.instance = instance
4617
4618   def Exec(self, feedback_fn):
4619     """Reinstall the instance.
4620
4621     """
4622     inst = self.instance
4623
4624     if self.op.os_type is not None:
4625       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4626       inst.os = self.op.os_type
4627       self.cfg.Update(inst, feedback_fn)
4628
4629     _StartInstanceDisks(self, inst, None)
4630     try:
4631       feedback_fn("Running the instance OS create scripts...")
4632       # FIXME: pass debug option from opcode to backend
4633       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4634                                              self.op.debug_level)
4635       result.Raise("Could not install OS for instance %s on node %s" %
4636                    (inst.name, inst.primary_node))
4637     finally:
4638       _ShutdownInstanceDisks(self, inst)
4639
4640
4641 class LURecreateInstanceDisks(LogicalUnit):
4642   """Recreate an instance's missing disks.
4643
4644   """
4645   HPATH = "instance-recreate-disks"
4646   HTYPE = constants.HTYPE_INSTANCE
4647   _OP_REQP = ["instance_name", "disks"]
4648   REQ_BGL = False
4649
4650   def CheckArguments(self):
4651     """Check the arguments.
4652
4653     """
4654     if not isinstance(self.op.disks, list):
4655       raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4656     for item in self.op.disks:
4657       if (not isinstance(item, int) or
4658           item < 0):
4659         raise errors.OpPrereqError("Invalid disk specification '%s'" %
4660                                    str(item), errors.ECODE_INVAL)
4661
4662   def ExpandNames(self):
4663     self._ExpandAndLockInstance()
4664
4665   def BuildHooksEnv(self):
4666     """Build hooks env.
4667
4668     This runs on master, primary and secondary nodes of the instance.
4669
4670     """
4671     env = _BuildInstanceHookEnvByObject(self, self.instance)
4672     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4673     return env, nl, nl
4674
4675   def CheckPrereq(self):
4676     """Check prerequisites.
4677
4678     This checks that the instance is in the cluster and is not running.
4679
4680     """
4681     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4682     assert instance is not None, \
4683       "Cannot retrieve locked instance %s" % self.op.instance_name
4684     _CheckNodeOnline(self, instance.primary_node)
4685
4686     if instance.disk_template == constants.DT_DISKLESS:
4687       raise errors.OpPrereqError("Instance '%s' has no disks" %
4688                                  self.op.instance_name, errors.ECODE_INVAL)
4689     _CheckInstanceDown(self, instance, "cannot recreate disks")
4690
4691     if not self.op.disks:
4692       self.op.disks = range(len(instance.disks))
4693     else:
4694       for idx in self.op.disks:
4695         if idx >= len(instance.disks):
4696           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4697                                      errors.ECODE_INVAL)
4698
4699     self.instance = instance
4700
4701   def Exec(self, feedback_fn):
4702     """Recreate the disks.
4703
4704     """
4705     to_skip = []
4706     for idx, _ in enumerate(self.instance.disks):
4707       if idx not in self.op.disks: # disk idx has not been passed in
4708         to_skip.append(idx)
4709         continue
4710
4711     _CreateDisks(self, self.instance, to_skip=to_skip)
4712
4713
4714 class LURenameInstance(LogicalUnit):
4715   """Rename an instance.
4716
4717   """
4718   HPATH = "instance-rename"
4719   HTYPE = constants.HTYPE_INSTANCE
4720   _OP_REQP = ["instance_name", "new_name"]
4721   _OP_DEFS = [("ignore_ip", False)]
4722
4723   def BuildHooksEnv(self):
4724     """Build hooks env.
4725
4726     This runs on master, primary and secondary nodes of the instance.
4727
4728     """
4729     env = _BuildInstanceHookEnvByObject(self, self.instance)
4730     env["INSTANCE_NEW_NAME"] = self.op.new_name
4731     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4732     return env, nl, nl
4733
4734   def CheckPrereq(self):
4735     """Check prerequisites.
4736
4737     This checks that the instance is in the cluster and is not running.
4738
4739     """
4740     self.op.instance_name = _ExpandInstanceName(self.cfg,
4741                                                 self.op.instance_name)
4742     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4743     assert instance is not None
4744     _CheckNodeOnline(self, instance.primary_node)
4745     _CheckInstanceDown(self, instance, "cannot rename")
4746     self.instance = instance
4747
4748     # new name verification
4749     name_info = utils.GetHostInfo(self.op.new_name)
4750
4751     self.op.new_name = new_name = name_info.name
4752     instance_list = self.cfg.GetInstanceList()
4753     if new_name in instance_list:
4754       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4755                                  new_name, errors.ECODE_EXISTS)
4756
4757     if not self.op.ignore_ip:
4758       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4759         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4760                                    (name_info.ip, new_name),
4761                                    errors.ECODE_NOTUNIQUE)
4762
4763
4764   def Exec(self, feedback_fn):
4765     """Reinstall the instance.
4766
4767     """
4768     inst = self.instance
4769     old_name = inst.name
4770
4771     if inst.disk_template == constants.DT_FILE:
4772       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4773
4774     self.cfg.RenameInstance(inst.name, self.op.new_name)
4775     # Change the instance lock. This is definitely safe while we hold the BGL
4776     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4777     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4778
4779     # re-read the instance from the configuration after rename
4780     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4781
4782     if inst.disk_template == constants.DT_FILE:
4783       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4784       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4785                                                      old_file_storage_dir,
4786                                                      new_file_storage_dir)
4787       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4788                    " (but the instance has been renamed in Ganeti)" %
4789                    (inst.primary_node, old_file_storage_dir,
4790                     new_file_storage_dir))
4791
4792     _StartInstanceDisks(self, inst, None)
4793     try:
4794       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4795                                                  old_name, self.op.debug_level)
4796       msg = result.fail_msg
4797       if msg:
4798         msg = ("Could not run OS rename script for instance %s on node %s"
4799                " (but the instance has been renamed in Ganeti): %s" %
4800                (inst.name, inst.primary_node, msg))
4801         self.proc.LogWarning(msg)
4802     finally:
4803       _ShutdownInstanceDisks(self, inst)
4804
4805
4806 class LURemoveInstance(LogicalUnit):
4807   """Remove an instance.
4808
4809   """
4810   HPATH = "instance-remove"
4811   HTYPE = constants.HTYPE_INSTANCE
4812   _OP_REQP = ["instance_name", "ignore_failures"]
4813   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
4814   REQ_BGL = False
4815
4816   def ExpandNames(self):
4817     self._ExpandAndLockInstance()
4818     self.needed_locks[locking.LEVEL_NODE] = []
4819     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4820
4821   def DeclareLocks(self, level):
4822     if level == locking.LEVEL_NODE:
4823       self._LockInstancesNodes()
4824
4825   def BuildHooksEnv(self):
4826     """Build hooks env.
4827
4828     This runs on master, primary and secondary nodes of the instance.
4829
4830     """
4831     env = _BuildInstanceHookEnvByObject(self, self.instance)
4832     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
4833     nl = [self.cfg.GetMasterNode()]
4834     nl_post = list(self.instance.all_nodes) + nl
4835     return env, nl, nl_post
4836
4837   def CheckPrereq(self):
4838     """Check prerequisites.
4839
4840     This checks that the instance is in the cluster.
4841
4842     """
4843     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4844     assert self.instance is not None, \
4845       "Cannot retrieve locked instance %s" % self.op.instance_name
4846
4847   def Exec(self, feedback_fn):
4848     """Remove the instance.
4849
4850     """
4851     instance = self.instance
4852     logging.info("Shutting down instance %s on node %s",
4853                  instance.name, instance.primary_node)
4854
4855     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4856                                              self.op.shutdown_timeout)
4857     msg = result.fail_msg
4858     if msg:
4859       if self.op.ignore_failures:
4860         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4861       else:
4862         raise errors.OpExecError("Could not shutdown instance %s on"
4863                                  " node %s: %s" %
4864                                  (instance.name, instance.primary_node, msg))
4865
4866     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4867
4868
4869 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4870   """Utility function to remove an instance.
4871
4872   """
4873   logging.info("Removing block devices for instance %s", instance.name)
4874
4875   if not _RemoveDisks(lu, instance):
4876     if not ignore_failures:
4877       raise errors.OpExecError("Can't remove instance's disks")
4878     feedback_fn("Warning: can't remove instance's disks")
4879
4880   logging.info("Removing instance %s out of cluster config", instance.name)
4881
4882   lu.cfg.RemoveInstance(instance.name)
4883
4884   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4885     "Instance lock removal conflict"
4886
4887   # Remove lock for the instance
4888   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4889
4890
4891 class LUQueryInstances(NoHooksLU):
4892   """Logical unit for querying instances.
4893
4894   """
4895   # pylint: disable-msg=W0142
4896   _OP_REQP = ["output_fields", "names", "use_locking"]
4897   REQ_BGL = False
4898   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4899                     "serial_no", "ctime", "mtime", "uuid"]
4900   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4901                                     "admin_state",
4902                                     "disk_template", "ip", "mac", "bridge",
4903                                     "nic_mode", "nic_link",
4904                                     "sda_size", "sdb_size", "vcpus", "tags",
4905                                     "network_port", "beparams",
4906                                     r"(disk)\.(size)/([0-9]+)",
4907                                     r"(disk)\.(sizes)", "disk_usage",
4908                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4909                                     r"(nic)\.(bridge)/([0-9]+)",
4910                                     r"(nic)\.(macs|ips|modes|links|bridges)",
4911                                     r"(disk|nic)\.(count)",
4912                                     "hvparams",
4913                                     ] + _SIMPLE_FIELDS +
4914                                   ["hv/%s" % name
4915                                    for name in constants.HVS_PARAMETERS
4916                                    if name not in constants.HVC_GLOBALS] +
4917                                   ["be/%s" % name
4918                                    for name in constants.BES_PARAMETERS])
4919   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4920
4921
4922   def CheckArguments(self):
4923     _CheckOutputFields(static=self._FIELDS_STATIC,
4924                        dynamic=self._FIELDS_DYNAMIC,
4925                        selected=self.op.output_fields)
4926
4927   def ExpandNames(self):
4928     self.needed_locks = {}
4929     self.share_locks[locking.LEVEL_INSTANCE] = 1
4930     self.share_locks[locking.LEVEL_NODE] = 1
4931
4932     if self.op.names:
4933       self.wanted = _GetWantedInstances(self, self.op.names)
4934     else:
4935       self.wanted = locking.ALL_SET
4936
4937     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4938     self.do_locking = self.do_node_query and self.op.use_locking
4939     if self.do_locking:
4940       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4941       self.needed_locks[locking.LEVEL_NODE] = []
4942       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4943
4944   def DeclareLocks(self, level):
4945     if level == locking.LEVEL_NODE and self.do_locking:
4946       self._LockInstancesNodes()
4947
4948   def CheckPrereq(self):
4949     """Check prerequisites.
4950
4951     """
4952     pass
4953
4954   def Exec(self, feedback_fn):
4955     """Computes the list of nodes and their attributes.
4956
4957     """
4958     # pylint: disable-msg=R0912
4959     # way too many branches here
4960     all_info = self.cfg.GetAllInstancesInfo()
4961     if self.wanted == locking.ALL_SET:
4962       # caller didn't specify instance names, so ordering is not important
4963       if self.do_locking:
4964         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4965       else:
4966         instance_names = all_info.keys()
4967       instance_names = utils.NiceSort(instance_names)
4968     else:
4969       # caller did specify names, so we must keep the ordering
4970       if self.do_locking:
4971         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4972       else:
4973         tgt_set = all_info.keys()
4974       missing = set(self.wanted).difference(tgt_set)
4975       if missing:
4976         raise errors.OpExecError("Some instances were removed before"
4977                                  " retrieving their data: %s" % missing)
4978       instance_names = self.wanted
4979
4980     instance_list = [all_info[iname] for iname in instance_names]
4981
4982     # begin data gathering
4983
4984     nodes = frozenset([inst.primary_node for inst in instance_list])
4985     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4986
4987     bad_nodes = []
4988     off_nodes = []
4989     if self.do_node_query:
4990       live_data = {}
4991       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4992       for name in nodes:
4993         result = node_data[name]
4994         if result.offline:
4995           # offline nodes will be in both lists
4996           off_nodes.append(name)
4997         if result.fail_msg:
4998           bad_nodes.append(name)
4999         else:
5000           if result.payload:
5001             live_data.update(result.payload)
5002           # else no instance is alive
5003     else:
5004       live_data = dict([(name, {}) for name in instance_names])
5005
5006     # end data gathering
5007
5008     HVPREFIX = "hv/"
5009     BEPREFIX = "be/"
5010     output = []
5011     cluster = self.cfg.GetClusterInfo()
5012     for instance in instance_list:
5013       iout = []
5014       i_hv = cluster.FillHV(instance, skip_globals=True)
5015       i_be = cluster.FillBE(instance)
5016       i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
5017       for field in self.op.output_fields:
5018         st_match = self._FIELDS_STATIC.Matches(field)
5019         if field in self._SIMPLE_FIELDS:
5020           val = getattr(instance, field)
5021         elif field == "pnode":
5022           val = instance.primary_node
5023         elif field == "snodes":
5024           val = list(instance.secondary_nodes)
5025         elif field == "admin_state":
5026           val = instance.admin_up
5027         elif field == "oper_state":
5028           if instance.primary_node in bad_nodes:
5029             val = None
5030           else:
5031             val = bool(live_data.get(instance.name))
5032         elif field == "status":
5033           if instance.primary_node in off_nodes:
5034             val = "ERROR_nodeoffline"
5035           elif instance.primary_node in bad_nodes:
5036             val = "ERROR_nodedown"
5037           else:
5038             running = bool(live_data.get(instance.name))
5039             if running:
5040               if instance.admin_up:
5041                 val = "running"
5042               else:
5043                 val = "ERROR_up"
5044             else:
5045               if instance.admin_up:
5046                 val = "ERROR_down"
5047               else:
5048                 val = "ADMIN_down"
5049         elif field == "oper_ram":
5050           if instance.primary_node in bad_nodes:
5051             val = None
5052           elif instance.name in live_data:
5053             val = live_data[instance.name].get("memory", "?")
5054           else:
5055             val = "-"
5056         elif field == "vcpus":
5057           val = i_be[constants.BE_VCPUS]
5058         elif field == "disk_template":
5059           val = instance.disk_template
5060         elif field == "ip":
5061           if instance.nics:
5062             val = instance.nics[0].ip
5063           else:
5064             val = None
5065         elif field == "nic_mode":
5066           if instance.nics:
5067             val = i_nicp[0][constants.NIC_MODE]
5068           else:
5069             val = None
5070         elif field == "nic_link":
5071           if instance.nics:
5072             val = i_nicp[0][constants.NIC_LINK]
5073           else:
5074             val = None
5075         elif field == "bridge":
5076           if (instance.nics and
5077               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
5078             val = i_nicp[0][constants.NIC_LINK]
5079           else:
5080             val = None
5081         elif field == "mac":
5082           if instance.nics:
5083             val = instance.nics[0].mac
5084           else:
5085             val = None
5086         elif field == "sda_size" or field == "sdb_size":
5087           idx = ord(field[2]) - ord('a')
5088           try:
5089             val = instance.FindDisk(idx).size
5090           except errors.OpPrereqError:
5091             val = None
5092         elif field == "disk_usage": # total disk usage per node
5093           disk_sizes = [{'size': disk.size} for disk in instance.disks]
5094           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
5095         elif field == "tags":
5096           val = list(instance.GetTags())
5097         elif field == "hvparams":
5098           val = i_hv
5099         elif (field.startswith(HVPREFIX) and
5100               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
5101               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
5102           val = i_hv.get(field[len(HVPREFIX):], None)
5103         elif field == "beparams":
5104           val = i_be
5105         elif (field.startswith(BEPREFIX) and
5106               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
5107           val = i_be.get(field[len(BEPREFIX):], None)
5108         elif st_match and st_match.groups():
5109           # matches a variable list
5110           st_groups = st_match.groups()
5111           if st_groups and st_groups[0] == "disk":
5112             if st_groups[1] == "count":
5113               val = len(instance.disks)
5114             elif st_groups[1] == "sizes":
5115               val = [disk.size for disk in instance.disks]
5116             elif st_groups[1] == "size":
5117               try:
5118                 val = instance.FindDisk(st_groups[2]).size
5119               except errors.OpPrereqError:
5120                 val = None
5121             else:
5122               assert False, "Unhandled disk parameter"
5123           elif st_groups[0] == "nic":
5124             if st_groups[1] == "count":
5125               val = len(instance.nics)
5126             elif st_groups[1] == "macs":
5127               val = [nic.mac for nic in instance.nics]
5128             elif st_groups[1] == "ips":
5129               val = [nic.ip for nic in instance.nics]
5130             elif st_groups[1] == "modes":
5131               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5132             elif st_groups[1] == "links":
5133               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5134             elif st_groups[1] == "bridges":
5135               val = []
5136               for nicp in i_nicp:
5137                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5138                   val.append(nicp[constants.NIC_LINK])
5139                 else:
5140                   val.append(None)
5141             else:
5142               # index-based item
5143               nic_idx = int(st_groups[2])
5144               if nic_idx >= len(instance.nics):
5145                 val = None
5146               else:
5147                 if st_groups[1] == "mac":
5148                   val = instance.nics[nic_idx].mac
5149                 elif st_groups[1] == "ip":
5150                   val = instance.nics[nic_idx].ip
5151                 elif st_groups[1] == "mode":
5152                   val = i_nicp[nic_idx][constants.NIC_MODE]
5153                 elif st_groups[1] == "link":
5154                   val = i_nicp[nic_idx][constants.NIC_LINK]
5155                 elif st_groups[1] == "bridge":
5156                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5157                   if nic_mode == constants.NIC_MODE_BRIDGED:
5158                     val = i_nicp[nic_idx][constants.NIC_LINK]
5159                   else:
5160                     val = None
5161                 else:
5162                   assert False, "Unhandled NIC parameter"
5163           else:
5164             assert False, ("Declared but unhandled variable parameter '%s'" %
5165                            field)
5166         else:
5167           assert False, "Declared but unhandled parameter '%s'" % field
5168         iout.append(val)
5169       output.append(iout)
5170
5171     return output
5172
5173
5174 class LUFailoverInstance(LogicalUnit):
5175   """Failover an instance.
5176
5177   """
5178   HPATH = "instance-failover"
5179   HTYPE = constants.HTYPE_INSTANCE
5180   _OP_REQP = ["instance_name", "ignore_consistency"]
5181   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5182   REQ_BGL = False
5183
5184   def ExpandNames(self):
5185     self._ExpandAndLockInstance()
5186     self.needed_locks[locking.LEVEL_NODE] = []
5187     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5188
5189   def DeclareLocks(self, level):
5190     if level == locking.LEVEL_NODE:
5191       self._LockInstancesNodes()
5192
5193   def BuildHooksEnv(self):
5194     """Build hooks env.
5195
5196     This runs on master, primary and secondary nodes of the instance.
5197
5198     """
5199     instance = self.instance
5200     source_node = instance.primary_node
5201     target_node = instance.secondary_nodes[0]
5202     env = {
5203       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5204       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5205       "OLD_PRIMARY": source_node,
5206       "OLD_SECONDARY": target_node,
5207       "NEW_PRIMARY": target_node,
5208       "NEW_SECONDARY": source_node,
5209       }
5210     env.update(_BuildInstanceHookEnvByObject(self, instance))
5211     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5212     nl_post = list(nl)
5213     nl_post.append(source_node)
5214     return env, nl, nl_post
5215
5216   def CheckPrereq(self):
5217     """Check prerequisites.
5218
5219     This checks that the instance is in the cluster.
5220
5221     """
5222     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5223     assert self.instance is not None, \
5224       "Cannot retrieve locked instance %s" % self.op.instance_name
5225
5226     bep = self.cfg.GetClusterInfo().FillBE(instance)
5227     if instance.disk_template not in constants.DTS_NET_MIRROR:
5228       raise errors.OpPrereqError("Instance's disk layout is not"
5229                                  " network mirrored, cannot failover.",
5230                                  errors.ECODE_STATE)
5231
5232     secondary_nodes = instance.secondary_nodes
5233     if not secondary_nodes:
5234       raise errors.ProgrammerError("no secondary node but using "
5235                                    "a mirrored disk template")
5236
5237     target_node = secondary_nodes[0]
5238     _CheckNodeOnline(self, target_node)
5239     _CheckNodeNotDrained(self, target_node)
5240     if instance.admin_up:
5241       # check memory requirements on the secondary node
5242       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5243                            instance.name, bep[constants.BE_MEMORY],
5244                            instance.hypervisor)
5245     else:
5246       self.LogInfo("Not checking memory on the secondary node as"
5247                    " instance will not be started")
5248
5249     # check bridge existance
5250     _CheckInstanceBridgesExist(self, instance, node=target_node)
5251
5252   def Exec(self, feedback_fn):
5253     """Failover an instance.
5254
5255     The failover is done by shutting it down on its present node and
5256     starting it on the secondary.
5257
5258     """
5259     instance = self.instance
5260
5261     source_node = instance.primary_node
5262     target_node = instance.secondary_nodes[0]
5263
5264     if instance.admin_up:
5265       feedback_fn("* checking disk consistency between source and target")
5266       for dev in instance.disks:
5267         # for drbd, these are drbd over lvm
5268         if not _CheckDiskConsistency(self, dev, target_node, False):
5269           if not self.op.ignore_consistency:
5270             raise errors.OpExecError("Disk %s is degraded on target node,"
5271                                      " aborting failover." % dev.iv_name)
5272     else:
5273       feedback_fn("* not checking disk consistency as instance is not running")
5274
5275     feedback_fn("* shutting down instance on source node")
5276     logging.info("Shutting down instance %s on node %s",
5277                  instance.name, source_node)
5278
5279     result = self.rpc.call_instance_shutdown(source_node, instance,
5280                                              self.op.shutdown_timeout)
5281     msg = result.fail_msg
5282     if msg:
5283       if self.op.ignore_consistency:
5284         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5285                              " Proceeding anyway. Please make sure node"
5286                              " %s is down. Error details: %s",
5287                              instance.name, source_node, source_node, msg)
5288       else:
5289         raise errors.OpExecError("Could not shutdown instance %s on"
5290                                  " node %s: %s" %
5291                                  (instance.name, source_node, msg))
5292
5293     feedback_fn("* deactivating the instance's disks on source node")
5294     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5295       raise errors.OpExecError("Can't shut down the instance's disks.")
5296
5297     instance.primary_node = target_node
5298     # distribute new instance config to the other nodes
5299     self.cfg.Update(instance, feedback_fn)
5300
5301     # Only start the instance if it's marked as up
5302     if instance.admin_up:
5303       feedback_fn("* activating the instance's disks on target node")
5304       logging.info("Starting instance %s on node %s",
5305                    instance.name, target_node)
5306
5307       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5308                                            ignore_secondaries=True)
5309       if not disks_ok:
5310         _ShutdownInstanceDisks(self, instance)
5311         raise errors.OpExecError("Can't activate the instance's disks")
5312
5313       feedback_fn("* starting the instance on the target node")
5314       result = self.rpc.call_instance_start(target_node, instance, None, None)
5315       msg = result.fail_msg
5316       if msg:
5317         _ShutdownInstanceDisks(self, instance)
5318         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5319                                  (instance.name, target_node, msg))
5320
5321
5322 class LUMigrateInstance(LogicalUnit):
5323   """Migrate an instance.
5324
5325   This is migration without shutting down, compared to the failover,
5326   which is done with shutdown.
5327
5328   """
5329   HPATH = "instance-migrate"
5330   HTYPE = constants.HTYPE_INSTANCE
5331   _OP_REQP = ["instance_name", "live", "cleanup"]
5332
5333   REQ_BGL = False
5334
5335   def ExpandNames(self):
5336     self._ExpandAndLockInstance()
5337
5338     self.needed_locks[locking.LEVEL_NODE] = []
5339     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5340
5341     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5342                                        self.op.live, self.op.cleanup)
5343     self.tasklets = [self._migrater]
5344
5345   def DeclareLocks(self, level):
5346     if level == locking.LEVEL_NODE:
5347       self._LockInstancesNodes()
5348
5349   def BuildHooksEnv(self):
5350     """Build hooks env.
5351
5352     This runs on master, primary and secondary nodes of the instance.
5353
5354     """
5355     instance = self._migrater.instance
5356     source_node = instance.primary_node
5357     target_node = instance.secondary_nodes[0]
5358     env = _BuildInstanceHookEnvByObject(self, instance)
5359     env["MIGRATE_LIVE"] = self.op.live
5360     env["MIGRATE_CLEANUP"] = self.op.cleanup
5361     env.update({
5362         "OLD_PRIMARY": source_node,
5363         "OLD_SECONDARY": target_node,
5364         "NEW_PRIMARY": target_node,
5365         "NEW_SECONDARY": source_node,
5366         })
5367     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5368     nl_post = list(nl)
5369     nl_post.append(source_node)
5370     return env, nl, nl_post
5371
5372
5373 class LUMoveInstance(LogicalUnit):
5374   """Move an instance by data-copying.
5375
5376   """
5377   HPATH = "instance-move"
5378   HTYPE = constants.HTYPE_INSTANCE
5379   _OP_REQP = ["instance_name", "target_node"]
5380   _OP_DEFS = [("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT)]
5381   REQ_BGL = False
5382
5383   def ExpandNames(self):
5384     self._ExpandAndLockInstance()
5385     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5386     self.op.target_node = target_node
5387     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5388     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5389
5390   def DeclareLocks(self, level):
5391     if level == locking.LEVEL_NODE:
5392       self._LockInstancesNodes(primary_only=True)
5393
5394   def BuildHooksEnv(self):
5395     """Build hooks env.
5396
5397     This runs on master, primary and secondary nodes of the instance.
5398
5399     """
5400     env = {
5401       "TARGET_NODE": self.op.target_node,
5402       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5403       }
5404     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5405     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5406                                        self.op.target_node]
5407     return env, nl, nl
5408
5409   def CheckPrereq(self):
5410     """Check prerequisites.
5411
5412     This checks that the instance is in the cluster.
5413
5414     """
5415     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5416     assert self.instance is not None, \
5417       "Cannot retrieve locked instance %s" % self.op.instance_name
5418
5419     node = self.cfg.GetNodeInfo(self.op.target_node)
5420     assert node is not None, \
5421       "Cannot retrieve locked node %s" % self.op.target_node
5422
5423     self.target_node = target_node = node.name
5424
5425     if target_node == instance.primary_node:
5426       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5427                                  (instance.name, target_node),
5428                                  errors.ECODE_STATE)
5429
5430     bep = self.cfg.GetClusterInfo().FillBE(instance)
5431
5432     for idx, dsk in enumerate(instance.disks):
5433       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5434         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5435                                    " cannot copy" % idx, errors.ECODE_STATE)
5436
5437     _CheckNodeOnline(self, target_node)
5438     _CheckNodeNotDrained(self, target_node)
5439
5440     if instance.admin_up:
5441       # check memory requirements on the secondary node
5442       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5443                            instance.name, bep[constants.BE_MEMORY],
5444                            instance.hypervisor)
5445     else:
5446       self.LogInfo("Not checking memory on the secondary node as"
5447                    " instance will not be started")
5448
5449     # check bridge existance
5450     _CheckInstanceBridgesExist(self, instance, node=target_node)
5451
5452   def Exec(self, feedback_fn):
5453     """Move an instance.
5454
5455     The move is done by shutting it down on its present node, copying
5456     the data over (slow) and starting it on the new node.
5457
5458     """
5459     instance = self.instance
5460
5461     source_node = instance.primary_node
5462     target_node = self.target_node
5463
5464     self.LogInfo("Shutting down instance %s on source node %s",
5465                  instance.name, source_node)
5466
5467     result = self.rpc.call_instance_shutdown(source_node, instance,
5468                                              self.op.shutdown_timeout)
5469     msg = result.fail_msg
5470     if msg:
5471       if self.op.ignore_consistency:
5472         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5473                              " Proceeding anyway. Please make sure node"
5474                              " %s is down. Error details: %s",
5475                              instance.name, source_node, source_node, msg)
5476       else:
5477         raise errors.OpExecError("Could not shutdown instance %s on"
5478                                  " node %s: %s" %
5479                                  (instance.name, source_node, msg))
5480
5481     # create the target disks
5482     try:
5483       _CreateDisks(self, instance, target_node=target_node)
5484     except errors.OpExecError:
5485       self.LogWarning("Device creation failed, reverting...")
5486       try:
5487         _RemoveDisks(self, instance, target_node=target_node)
5488       finally:
5489         self.cfg.ReleaseDRBDMinors(instance.name)
5490         raise
5491
5492     cluster_name = self.cfg.GetClusterInfo().cluster_name
5493
5494     errs = []
5495     # activate, get path, copy the data over
5496     for idx, disk in enumerate(instance.disks):
5497       self.LogInfo("Copying data for disk %d", idx)
5498       result = self.rpc.call_blockdev_assemble(target_node, disk,
5499                                                instance.name, True)
5500       if result.fail_msg:
5501         self.LogWarning("Can't assemble newly created disk %d: %s",
5502                         idx, result.fail_msg)
5503         errs.append(result.fail_msg)
5504         break
5505       dev_path = result.payload
5506       result = self.rpc.call_blockdev_export(source_node, disk,
5507                                              target_node, dev_path,
5508                                              cluster_name)
5509       if result.fail_msg:
5510         self.LogWarning("Can't copy data over for disk %d: %s",
5511                         idx, result.fail_msg)
5512         errs.append(result.fail_msg)
5513         break
5514
5515     if errs:
5516       self.LogWarning("Some disks failed to copy, aborting")
5517       try:
5518         _RemoveDisks(self, instance, target_node=target_node)
5519       finally:
5520         self.cfg.ReleaseDRBDMinors(instance.name)
5521         raise errors.OpExecError("Errors during disk copy: %s" %
5522                                  (",".join(errs),))
5523
5524     instance.primary_node = target_node
5525     self.cfg.Update(instance, feedback_fn)
5526
5527     self.LogInfo("Removing the disks on the original node")
5528     _RemoveDisks(self, instance, target_node=source_node)
5529
5530     # Only start the instance if it's marked as up
5531     if instance.admin_up:
5532       self.LogInfo("Starting instance %s on node %s",
5533                    instance.name, target_node)
5534
5535       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5536                                            ignore_secondaries=True)
5537       if not disks_ok:
5538         _ShutdownInstanceDisks(self, instance)
5539         raise errors.OpExecError("Can't activate the instance's disks")
5540
5541       result = self.rpc.call_instance_start(target_node, instance, None, None)
5542       msg = result.fail_msg
5543       if msg:
5544         _ShutdownInstanceDisks(self, instance)
5545         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5546                                  (instance.name, target_node, msg))
5547
5548
5549 class LUMigrateNode(LogicalUnit):
5550   """Migrate all instances from a node.
5551
5552   """
5553   HPATH = "node-migrate"
5554   HTYPE = constants.HTYPE_NODE
5555   _OP_REQP = ["node_name", "live"]
5556   REQ_BGL = False
5557
5558   def ExpandNames(self):
5559     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5560
5561     self.needed_locks = {
5562       locking.LEVEL_NODE: [self.op.node_name],
5563       }
5564
5565     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5566
5567     # Create tasklets for migrating instances for all instances on this node
5568     names = []
5569     tasklets = []
5570
5571     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5572       logging.debug("Migrating instance %s", inst.name)
5573       names.append(inst.name)
5574
5575       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5576
5577     self.tasklets = tasklets
5578
5579     # Declare instance locks
5580     self.needed_locks[locking.LEVEL_INSTANCE] = names
5581
5582   def DeclareLocks(self, level):
5583     if level == locking.LEVEL_NODE:
5584       self._LockInstancesNodes()
5585
5586   def BuildHooksEnv(self):
5587     """Build hooks env.
5588
5589     This runs on the master, the primary and all the secondaries.
5590
5591     """
5592     env = {
5593       "NODE_NAME": self.op.node_name,
5594       }
5595
5596     nl = [self.cfg.GetMasterNode()]
5597
5598     return (env, nl, nl)
5599
5600
5601 class TLMigrateInstance(Tasklet):
5602   def __init__(self, lu, instance_name, live, cleanup):
5603     """Initializes this class.
5604
5605     """
5606     Tasklet.__init__(self, lu)
5607
5608     # Parameters
5609     self.instance_name = instance_name
5610     self.live = live
5611     self.cleanup = cleanup
5612
5613   def CheckPrereq(self):
5614     """Check prerequisites.
5615
5616     This checks that the instance is in the cluster.
5617
5618     """
5619     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5620     instance = self.cfg.GetInstanceInfo(instance_name)
5621     assert instance is not None
5622
5623     if instance.disk_template != constants.DT_DRBD8:
5624       raise errors.OpPrereqError("Instance's disk layout is not"
5625                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5626
5627     secondary_nodes = instance.secondary_nodes
5628     if not secondary_nodes:
5629       raise errors.ConfigurationError("No secondary node but using"
5630                                       " drbd8 disk template")
5631
5632     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5633
5634     target_node = secondary_nodes[0]
5635     # check memory requirements on the secondary node
5636     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5637                          instance.name, i_be[constants.BE_MEMORY],
5638                          instance.hypervisor)
5639
5640     # check bridge existance
5641     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5642
5643     if not self.cleanup:
5644       _CheckNodeNotDrained(self.lu, target_node)
5645       result = self.rpc.call_instance_migratable(instance.primary_node,
5646                                                  instance)
5647       result.Raise("Can't migrate, please use failover",
5648                    prereq=True, ecode=errors.ECODE_STATE)
5649
5650     self.instance = instance
5651
5652   def _WaitUntilSync(self):
5653     """Poll with custom rpc for disk sync.
5654
5655     This uses our own step-based rpc call.
5656
5657     """
5658     self.feedback_fn("* wait until resync is done")
5659     all_done = False
5660     while not all_done:
5661       all_done = True
5662       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5663                                             self.nodes_ip,
5664                                             self.instance.disks)
5665       min_percent = 100
5666       for node, nres in result.items():
5667         nres.Raise("Cannot resync disks on node %s" % node)
5668         node_done, node_percent = nres.payload
5669         all_done = all_done and node_done
5670         if node_percent is not None:
5671           min_percent = min(min_percent, node_percent)
5672       if not all_done:
5673         if min_percent < 100:
5674           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5675         time.sleep(2)
5676
5677   def _EnsureSecondary(self, node):
5678     """Demote a node to secondary.
5679
5680     """
5681     self.feedback_fn("* switching node %s to secondary mode" % node)
5682
5683     for dev in self.instance.disks:
5684       self.cfg.SetDiskID(dev, node)
5685
5686     result = self.rpc.call_blockdev_close(node, self.instance.name,
5687                                           self.instance.disks)
5688     result.Raise("Cannot change disk to secondary on node %s" % node)
5689
5690   def _GoStandalone(self):
5691     """Disconnect from the network.
5692
5693     """
5694     self.feedback_fn("* changing into standalone mode")
5695     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5696                                                self.instance.disks)
5697     for node, nres in result.items():
5698       nres.Raise("Cannot disconnect disks node %s" % node)
5699
5700   def _GoReconnect(self, multimaster):
5701     """Reconnect to the network.
5702
5703     """
5704     if multimaster:
5705       msg = "dual-master"
5706     else:
5707       msg = "single-master"
5708     self.feedback_fn("* changing disks into %s mode" % msg)
5709     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5710                                            self.instance.disks,
5711                                            self.instance.name, multimaster)
5712     for node, nres in result.items():
5713       nres.Raise("Cannot change disks config on node %s" % node)
5714
5715   def _ExecCleanup(self):
5716     """Try to cleanup after a failed migration.
5717
5718     The cleanup is done by:
5719       - check that the instance is running only on one node
5720         (and update the config if needed)
5721       - change disks on its secondary node to secondary
5722       - wait until disks are fully synchronized
5723       - disconnect from the network
5724       - change disks into single-master mode
5725       - wait again until disks are fully synchronized
5726
5727     """
5728     instance = self.instance
5729     target_node = self.target_node
5730     source_node = self.source_node
5731
5732     # check running on only one node
5733     self.feedback_fn("* checking where the instance actually runs"
5734                      " (if this hangs, the hypervisor might be in"
5735                      " a bad state)")
5736     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5737     for node, result in ins_l.items():
5738       result.Raise("Can't contact node %s" % node)
5739
5740     runningon_source = instance.name in ins_l[source_node].payload
5741     runningon_target = instance.name in ins_l[target_node].payload
5742
5743     if runningon_source and runningon_target:
5744       raise errors.OpExecError("Instance seems to be running on two nodes,"
5745                                " or the hypervisor is confused. You will have"
5746                                " to ensure manually that it runs only on one"
5747                                " and restart this operation.")
5748
5749     if not (runningon_source or runningon_target):
5750       raise errors.OpExecError("Instance does not seem to be running at all."
5751                                " In this case, it's safer to repair by"
5752                                " running 'gnt-instance stop' to ensure disk"
5753                                " shutdown, and then restarting it.")
5754
5755     if runningon_target:
5756       # the migration has actually succeeded, we need to update the config
5757       self.feedback_fn("* instance running on secondary node (%s),"
5758                        " updating config" % target_node)
5759       instance.primary_node = target_node
5760       self.cfg.Update(instance, self.feedback_fn)
5761       demoted_node = source_node
5762     else:
5763       self.feedback_fn("* instance confirmed to be running on its"
5764                        " primary node (%s)" % source_node)
5765       demoted_node = target_node
5766
5767     self._EnsureSecondary(demoted_node)
5768     try:
5769       self._WaitUntilSync()
5770     except errors.OpExecError:
5771       # we ignore here errors, since if the device is standalone, it
5772       # won't be able to sync
5773       pass
5774     self._GoStandalone()
5775     self._GoReconnect(False)
5776     self._WaitUntilSync()
5777
5778     self.feedback_fn("* done")
5779
5780   def _RevertDiskStatus(self):
5781     """Try to revert the disk status after a failed migration.
5782
5783     """
5784     target_node = self.target_node
5785     try:
5786       self._EnsureSecondary(target_node)
5787       self._GoStandalone()
5788       self._GoReconnect(False)
5789       self._WaitUntilSync()
5790     except errors.OpExecError, err:
5791       self.lu.LogWarning("Migration failed and I can't reconnect the"
5792                          " drives: error '%s'\n"
5793                          "Please look and recover the instance status" %
5794                          str(err))
5795
5796   def _AbortMigration(self):
5797     """Call the hypervisor code to abort a started migration.
5798
5799     """
5800     instance = self.instance
5801     target_node = self.target_node
5802     migration_info = self.migration_info
5803
5804     abort_result = self.rpc.call_finalize_migration(target_node,
5805                                                     instance,
5806                                                     migration_info,
5807                                                     False)
5808     abort_msg = abort_result.fail_msg
5809     if abort_msg:
5810       logging.error("Aborting migration failed on target node %s: %s",
5811                     target_node, abort_msg)
5812       # Don't raise an exception here, as we stil have to try to revert the
5813       # disk status, even if this step failed.
5814
5815   def _ExecMigration(self):
5816     """Migrate an instance.
5817
5818     The migrate is done by:
5819       - change the disks into dual-master mode
5820       - wait until disks are fully synchronized again
5821       - migrate the instance
5822       - change disks on the new secondary node (the old primary) to secondary
5823       - wait until disks are fully synchronized
5824       - change disks into single-master mode
5825
5826     """
5827     instance = self.instance
5828     target_node = self.target_node
5829     source_node = self.source_node
5830
5831     self.feedback_fn("* checking disk consistency between source and target")
5832     for dev in instance.disks:
5833       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5834         raise errors.OpExecError("Disk %s is degraded or not fully"
5835                                  " synchronized on target node,"
5836                                  " aborting migrate." % dev.iv_name)
5837
5838     # First get the migration information from the remote node
5839     result = self.rpc.call_migration_info(source_node, instance)
5840     msg = result.fail_msg
5841     if msg:
5842       log_err = ("Failed fetching source migration information from %s: %s" %
5843                  (source_node, msg))
5844       logging.error(log_err)
5845       raise errors.OpExecError(log_err)
5846
5847     self.migration_info = migration_info = result.payload
5848
5849     # Then switch the disks to master/master mode
5850     self._EnsureSecondary(target_node)
5851     self._GoStandalone()
5852     self._GoReconnect(True)
5853     self._WaitUntilSync()
5854
5855     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5856     result = self.rpc.call_accept_instance(target_node,
5857                                            instance,
5858                                            migration_info,
5859                                            self.nodes_ip[target_node])
5860
5861     msg = result.fail_msg
5862     if msg:
5863       logging.error("Instance pre-migration failed, trying to revert"
5864                     " disk status: %s", msg)
5865       self.feedback_fn("Pre-migration failed, aborting")
5866       self._AbortMigration()
5867       self._RevertDiskStatus()
5868       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5869                                (instance.name, msg))
5870
5871     self.feedback_fn("* migrating instance to %s" % target_node)
5872     time.sleep(10)
5873     result = self.rpc.call_instance_migrate(source_node, instance,
5874                                             self.nodes_ip[target_node],
5875                                             self.live)
5876     msg = result.fail_msg
5877     if msg:
5878       logging.error("Instance migration failed, trying to revert"
5879                     " disk status: %s", msg)
5880       self.feedback_fn("Migration failed, aborting")
5881       self._AbortMigration()
5882       self._RevertDiskStatus()
5883       raise errors.OpExecError("Could not migrate instance %s: %s" %
5884                                (instance.name, msg))
5885     time.sleep(10)
5886
5887     instance.primary_node = target_node
5888     # distribute new instance config to the other nodes
5889     self.cfg.Update(instance, self.feedback_fn)
5890
5891     result = self.rpc.call_finalize_migration(target_node,
5892                                               instance,
5893                                               migration_info,
5894                                               True)
5895     msg = result.fail_msg
5896     if msg:
5897       logging.error("Instance migration succeeded, but finalization failed:"
5898                     " %s", msg)
5899       raise errors.OpExecError("Could not finalize instance migration: %s" %
5900                                msg)
5901
5902     self._EnsureSecondary(source_node)
5903     self._WaitUntilSync()
5904     self._GoStandalone()
5905     self._GoReconnect(False)
5906     self._WaitUntilSync()
5907
5908     self.feedback_fn("* done")
5909
5910   def Exec(self, feedback_fn):
5911     """Perform the migration.
5912
5913     """
5914     feedback_fn("Migrating instance %s" % self.instance.name)
5915
5916     self.feedback_fn = feedback_fn
5917
5918     self.source_node = self.instance.primary_node
5919     self.target_node = self.instance.secondary_nodes[0]
5920     self.all_nodes = [self.source_node, self.target_node]
5921     self.nodes_ip = {
5922       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5923       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5924       }
5925
5926     if self.cleanup:
5927       return self._ExecCleanup()
5928     else:
5929       return self._ExecMigration()
5930
5931
5932 def _CreateBlockDev(lu, node, instance, device, force_create,
5933                     info, force_open):
5934   """Create a tree of block devices on a given node.
5935
5936   If this device type has to be created on secondaries, create it and
5937   all its children.
5938
5939   If not, just recurse to children keeping the same 'force' value.
5940
5941   @param lu: the lu on whose behalf we execute
5942   @param node: the node on which to create the device
5943   @type instance: L{objects.Instance}
5944   @param instance: the instance which owns the device
5945   @type device: L{objects.Disk}
5946   @param device: the device to create
5947   @type force_create: boolean
5948   @param force_create: whether to force creation of this device; this
5949       will be change to True whenever we find a device which has
5950       CreateOnSecondary() attribute
5951   @param info: the extra 'metadata' we should attach to the device
5952       (this will be represented as a LVM tag)
5953   @type force_open: boolean
5954   @param force_open: this parameter will be passes to the
5955       L{backend.BlockdevCreate} function where it specifies
5956       whether we run on primary or not, and it affects both
5957       the child assembly and the device own Open() execution
5958
5959   """
5960   if device.CreateOnSecondary():
5961     force_create = True
5962
5963   if device.children:
5964     for child in device.children:
5965       _CreateBlockDev(lu, node, instance, child, force_create,
5966                       info, force_open)
5967
5968   if not force_create:
5969     return
5970
5971   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5972
5973
5974 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5975   """Create a single block device on a given node.
5976
5977   This will not recurse over children of the device, so they must be
5978   created in advance.
5979
5980   @param lu: the lu on whose behalf we execute
5981   @param node: the node on which to create the device
5982   @type instance: L{objects.Instance}
5983   @param instance: the instance which owns the device
5984   @type device: L{objects.Disk}
5985   @param device: the device to create
5986   @param info: the extra 'metadata' we should attach to the device
5987       (this will be represented as a LVM tag)
5988   @type force_open: boolean
5989   @param force_open: this parameter will be passes to the
5990       L{backend.BlockdevCreate} function where it specifies
5991       whether we run on primary or not, and it affects both
5992       the child assembly and the device own Open() execution
5993
5994   """
5995   lu.cfg.SetDiskID(device, node)
5996   result = lu.rpc.call_blockdev_create(node, device, device.size,
5997                                        instance.name, force_open, info)
5998   result.Raise("Can't create block device %s on"
5999                " node %s for instance %s" % (device, node, instance.name))
6000   if device.physical_id is None:
6001     device.physical_id = result.payload
6002
6003
6004 def _GenerateUniqueNames(lu, exts):
6005   """Generate a suitable LV name.
6006
6007   This will generate a logical volume name for the given instance.
6008
6009   """
6010   results = []
6011   for val in exts:
6012     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
6013     results.append("%s%s" % (new_id, val))
6014   return results
6015
6016
6017 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
6018                          p_minor, s_minor):
6019   """Generate a drbd8 device complete with its children.
6020
6021   """
6022   port = lu.cfg.AllocatePort()
6023   vgname = lu.cfg.GetVGName()
6024   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
6025   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
6026                           logical_id=(vgname, names[0]))
6027   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
6028                           logical_id=(vgname, names[1]))
6029   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
6030                           logical_id=(primary, secondary, port,
6031                                       p_minor, s_minor,
6032                                       shared_secret),
6033                           children=[dev_data, dev_meta],
6034                           iv_name=iv_name)
6035   return drbd_dev
6036
6037
6038 def _GenerateDiskTemplate(lu, template_name,
6039                           instance_name, primary_node,
6040                           secondary_nodes, disk_info,
6041                           file_storage_dir, file_driver,
6042                           base_index):
6043   """Generate the entire disk layout for a given template type.
6044
6045   """
6046   #TODO: compute space requirements
6047
6048   vgname = lu.cfg.GetVGName()
6049   disk_count = len(disk_info)
6050   disks = []
6051   if template_name == constants.DT_DISKLESS:
6052     pass
6053   elif template_name == constants.DT_PLAIN:
6054     if len(secondary_nodes) != 0:
6055       raise errors.ProgrammerError("Wrong template configuration")
6056
6057     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6058                                       for i in range(disk_count)])
6059     for idx, disk in enumerate(disk_info):
6060       disk_index = idx + base_index
6061       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
6062                               logical_id=(vgname, names[idx]),
6063                               iv_name="disk/%d" % disk_index,
6064                               mode=disk["mode"])
6065       disks.append(disk_dev)
6066   elif template_name == constants.DT_DRBD8:
6067     if len(secondary_nodes) != 1:
6068       raise errors.ProgrammerError("Wrong template configuration")
6069     remote_node = secondary_nodes[0]
6070     minors = lu.cfg.AllocateDRBDMinor(
6071       [primary_node, remote_node] * len(disk_info), instance_name)
6072
6073     names = []
6074     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
6075                                                for i in range(disk_count)]):
6076       names.append(lv_prefix + "_data")
6077       names.append(lv_prefix + "_meta")
6078     for idx, disk in enumerate(disk_info):
6079       disk_index = idx + base_index
6080       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
6081                                       disk["size"], names[idx*2:idx*2+2],
6082                                       "disk/%d" % disk_index,
6083                                       minors[idx*2], minors[idx*2+1])
6084       disk_dev.mode = disk["mode"]
6085       disks.append(disk_dev)
6086   elif template_name == constants.DT_FILE:
6087     if len(secondary_nodes) != 0:
6088       raise errors.ProgrammerError("Wrong template configuration")
6089
6090     _RequireFileStorage()
6091
6092     for idx, disk in enumerate(disk_info):
6093       disk_index = idx + base_index
6094       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
6095                               iv_name="disk/%d" % disk_index,
6096                               logical_id=(file_driver,
6097                                           "%s/disk%d" % (file_storage_dir,
6098                                                          disk_index)),
6099                               mode=disk["mode"])
6100       disks.append(disk_dev)
6101   else:
6102     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
6103   return disks
6104
6105
6106 def _GetInstanceInfoText(instance):
6107   """Compute that text that should be added to the disk's metadata.
6108
6109   """
6110   return "originstname+%s" % instance.name
6111
6112
6113 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
6114   """Create all disks for an instance.
6115
6116   This abstracts away some work from AddInstance.
6117
6118   @type lu: L{LogicalUnit}
6119   @param lu: the logical unit on whose behalf we execute
6120   @type instance: L{objects.Instance}
6121   @param instance: the instance whose disks we should create
6122   @type to_skip: list
6123   @param to_skip: list of indices to skip
6124   @type target_node: string
6125   @param target_node: if passed, overrides the target node for creation
6126   @rtype: boolean
6127   @return: the success of the creation
6128
6129   """
6130   info = _GetInstanceInfoText(instance)
6131   if target_node is None:
6132     pnode = instance.primary_node
6133     all_nodes = instance.all_nodes
6134   else:
6135     pnode = target_node
6136     all_nodes = [pnode]
6137
6138   if instance.disk_template == constants.DT_FILE:
6139     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6140     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6141
6142     result.Raise("Failed to create directory '%s' on"
6143                  " node %s" % (file_storage_dir, pnode))
6144
6145   # Note: this needs to be kept in sync with adding of disks in
6146   # LUSetInstanceParams
6147   for idx, device in enumerate(instance.disks):
6148     if to_skip and idx in to_skip:
6149       continue
6150     logging.info("Creating volume %s for instance %s",
6151                  device.iv_name, instance.name)
6152     #HARDCODE
6153     for node in all_nodes:
6154       f_create = node == pnode
6155       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6156
6157
6158 def _RemoveDisks(lu, instance, target_node=None):
6159   """Remove all disks for an instance.
6160
6161   This abstracts away some work from `AddInstance()` and
6162   `RemoveInstance()`. Note that in case some of the devices couldn't
6163   be removed, the removal will continue with the other ones (compare
6164   with `_CreateDisks()`).
6165
6166   @type lu: L{LogicalUnit}
6167   @param lu: the logical unit on whose behalf we execute
6168   @type instance: L{objects.Instance}
6169   @param instance: the instance whose disks we should remove
6170   @type target_node: string
6171   @param target_node: used to override the node on which to remove the disks
6172   @rtype: boolean
6173   @return: the success of the removal
6174
6175   """
6176   logging.info("Removing block devices for instance %s", instance.name)
6177
6178   all_result = True
6179   for device in instance.disks:
6180     if target_node:
6181       edata = [(target_node, device)]
6182     else:
6183       edata = device.ComputeNodeTree(instance.primary_node)
6184     for node, disk in edata:
6185       lu.cfg.SetDiskID(disk, node)
6186       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6187       if msg:
6188         lu.LogWarning("Could not remove block device %s on node %s,"
6189                       " continuing anyway: %s", device.iv_name, node, msg)
6190         all_result = False
6191
6192   if instance.disk_template == constants.DT_FILE:
6193     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6194     if target_node:
6195       tgt = target_node
6196     else:
6197       tgt = instance.primary_node
6198     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6199     if result.fail_msg:
6200       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6201                     file_storage_dir, instance.primary_node, result.fail_msg)
6202       all_result = False
6203
6204   return all_result
6205
6206
6207 def _ComputeDiskSize(disk_template, disks):
6208   """Compute disk size requirements in the volume group
6209
6210   """
6211   # Required free disk space as a function of disk and swap space
6212   req_size_dict = {
6213     constants.DT_DISKLESS: None,
6214     constants.DT_PLAIN: sum(d["size"] for d in disks),
6215     # 128 MB are added for drbd metadata for each disk
6216     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6217     constants.DT_FILE: None,
6218   }
6219
6220   if disk_template not in req_size_dict:
6221     raise errors.ProgrammerError("Disk template '%s' size requirement"
6222                                  " is unknown" %  disk_template)
6223
6224   return req_size_dict[disk_template]
6225
6226
6227 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6228   """Hypervisor parameter validation.
6229
6230   This function abstract the hypervisor parameter validation to be
6231   used in both instance create and instance modify.
6232
6233   @type lu: L{LogicalUnit}
6234   @param lu: the logical unit for which we check
6235   @type nodenames: list
6236   @param nodenames: the list of nodes on which we should check
6237   @type hvname: string
6238   @param hvname: the name of the hypervisor we should use
6239   @type hvparams: dict
6240   @param hvparams: the parameters which we need to check
6241   @raise errors.OpPrereqError: if the parameters are not valid
6242
6243   """
6244   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6245                                                   hvname,
6246                                                   hvparams)
6247   for node in nodenames:
6248     info = hvinfo[node]
6249     if info.offline:
6250       continue
6251     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6252
6253
6254 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6255   """OS parameters validation.
6256
6257   @type lu: L{LogicalUnit}
6258   @param lu: the logical unit for which we check
6259   @type required: boolean
6260   @param required: whether the validation should fail if the OS is not
6261       found
6262   @type nodenames: list
6263   @param nodenames: the list of nodes on which we should check
6264   @type osname: string
6265   @param osname: the name of the hypervisor we should use
6266   @type osparams: dict
6267   @param osparams: the parameters which we need to check
6268   @raise errors.OpPrereqError: if the parameters are not valid
6269
6270   """
6271   result = lu.rpc.call_os_validate(required, nodenames, osname,
6272                                    [constants.OS_VALIDATE_PARAMETERS],
6273                                    osparams)
6274   for node, nres in result.items():
6275     # we don't check for offline cases since this should be run only
6276     # against the master node and/or an instance's nodes
6277     nres.Raise("OS Parameters validation failed on node %s" % node)
6278     if not nres.payload:
6279       lu.LogInfo("OS %s not found on node %s, validation skipped",
6280                  osname, node)
6281
6282
6283 class LUCreateInstance(LogicalUnit):
6284   """Create an instance.
6285
6286   """
6287   HPATH = "instance-add"
6288   HTYPE = constants.HTYPE_INSTANCE
6289   _OP_REQP = ["instance_name", "disks",
6290               "mode", "start",
6291               "wait_for_sync", "ip_check", "nics",
6292               "hvparams", "beparams", "osparams"]
6293   _OP_DEFS = [
6294     ("name_check", True),
6295     ("no_install", False),
6296     ("os_type", None),
6297     ("force_variant", False),
6298     ("source_handshake", None),
6299     ("source_x509_ca", None),
6300     ("source_instance_name", None),
6301     ("src_node", None),
6302     ("src_path", None),
6303     ("pnode", None),
6304     ("snode", None),
6305     ("iallocator", None),
6306     ("hypervisor", None),
6307     ("disk_template", None),
6308     ("identify_defaults", None),
6309     ]
6310   REQ_BGL = False
6311
6312   def CheckArguments(self):
6313     """Check arguments.
6314
6315     """
6316     # do not require name_check to ease forward/backward compatibility
6317     # for tools
6318     if self.op.no_install and self.op.start:
6319       self.LogInfo("No-installation mode selected, disabling startup")
6320       self.op.start = False
6321     # validate/normalize the instance name
6322     self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6323     if self.op.ip_check and not self.op.name_check:
6324       # TODO: make the ip check more flexible and not depend on the name check
6325       raise errors.OpPrereqError("Cannot do ip checks without a name check",
6326                                  errors.ECODE_INVAL)
6327
6328     # check nics' parameter names
6329     for nic in self.op.nics:
6330       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6331
6332     # check disks. parameter names and consistent adopt/no-adopt strategy
6333     has_adopt = has_no_adopt = False
6334     for disk in self.op.disks:
6335       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6336       if "adopt" in disk:
6337         has_adopt = True
6338       else:
6339         has_no_adopt = True
6340     if has_adopt and has_no_adopt:
6341       raise errors.OpPrereqError("Either all disks are adopted or none is",
6342                                  errors.ECODE_INVAL)
6343     if has_adopt:
6344       if self.op.disk_template != constants.DT_PLAIN:
6345         raise errors.OpPrereqError("Disk adoption is only supported for the"
6346                                    " 'plain' disk template",
6347                                    errors.ECODE_INVAL)
6348       if self.op.iallocator is not None:
6349         raise errors.OpPrereqError("Disk adoption not allowed with an"
6350                                    " iallocator script", errors.ECODE_INVAL)
6351       if self.op.mode == constants.INSTANCE_IMPORT:
6352         raise errors.OpPrereqError("Disk adoption not allowed for"
6353                                    " instance import", errors.ECODE_INVAL)
6354
6355     self.adopt_disks = has_adopt
6356
6357     # verify creation mode
6358     if self.op.mode not in constants.INSTANCE_CREATE_MODES:
6359       raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6360                                  self.op.mode, errors.ECODE_INVAL)
6361
6362     # instance name verification
6363     if self.op.name_check:
6364       self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6365       self.op.instance_name = self.hostname1.name
6366       # used in CheckPrereq for ip ping check
6367       self.check_ip = self.hostname1.ip
6368     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6369       raise errors.OpPrereqError("Remote imports require names to be checked" %
6370                                  errors.ECODE_INVAL)
6371     else:
6372       self.check_ip = None
6373
6374     # file storage checks
6375     if (self.op.file_driver and
6376         not self.op.file_driver in constants.FILE_DRIVER):
6377       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6378                                  self.op.file_driver, errors.ECODE_INVAL)
6379
6380     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6381       raise errors.OpPrereqError("File storage directory path not absolute",
6382                                  errors.ECODE_INVAL)
6383
6384     ### Node/iallocator related checks
6385     if [self.op.iallocator, self.op.pnode].count(None) != 1:
6386       raise errors.OpPrereqError("One and only one of iallocator and primary"
6387                                  " node must be given",
6388                                  errors.ECODE_INVAL)
6389
6390     self._cds = _GetClusterDomainSecret()
6391
6392     if self.op.mode == constants.INSTANCE_IMPORT:
6393       # On import force_variant must be True, because if we forced it at
6394       # initial install, our only chance when importing it back is that it
6395       # works again!
6396       self.op.force_variant = True
6397
6398       if self.op.no_install:
6399         self.LogInfo("No-installation mode has no effect during import")
6400
6401     elif self.op.mode == constants.INSTANCE_CREATE:
6402       if self.op.os_type is None:
6403         raise errors.OpPrereqError("No guest OS specified",
6404                                    errors.ECODE_INVAL)
6405       if self.op.disk_template is None:
6406         raise errors.OpPrereqError("No disk template specified",
6407                                    errors.ECODE_INVAL)
6408
6409     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6410       # Check handshake to ensure both clusters have the same domain secret
6411       src_handshake = self.op.source_handshake
6412       if not src_handshake:
6413         raise errors.OpPrereqError("Missing source handshake",
6414                                    errors.ECODE_INVAL)
6415
6416       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6417                                                            src_handshake)
6418       if errmsg:
6419         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6420                                    errors.ECODE_INVAL)
6421
6422       # Load and check source CA
6423       self.source_x509_ca_pem = self.op.source_x509_ca
6424       if not self.source_x509_ca_pem:
6425         raise errors.OpPrereqError("Missing source X509 CA",
6426                                    errors.ECODE_INVAL)
6427
6428       try:
6429         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6430                                                     self._cds)
6431       except OpenSSL.crypto.Error, err:
6432         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6433                                    (err, ), errors.ECODE_INVAL)
6434
6435       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6436       if errcode is not None:
6437         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6438                                    errors.ECODE_INVAL)
6439
6440       self.source_x509_ca = cert
6441
6442       src_instance_name = self.op.source_instance_name
6443       if not src_instance_name:
6444         raise errors.OpPrereqError("Missing source instance name",
6445                                    errors.ECODE_INVAL)
6446
6447       self.source_instance_name = \
6448         utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6449
6450     else:
6451       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6452                                  self.op.mode, errors.ECODE_INVAL)
6453
6454   def ExpandNames(self):
6455     """ExpandNames for CreateInstance.
6456
6457     Figure out the right locks for instance creation.
6458
6459     """
6460     self.needed_locks = {}
6461
6462     instance_name = self.op.instance_name
6463     # this is just a preventive check, but someone might still add this
6464     # instance in the meantime, and creation will fail at lock-add time
6465     if instance_name in self.cfg.GetInstanceList():
6466       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6467                                  instance_name, errors.ECODE_EXISTS)
6468
6469     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6470
6471     if self.op.iallocator:
6472       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6473     else:
6474       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6475       nodelist = [self.op.pnode]
6476       if self.op.snode is not None:
6477         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6478         nodelist.append(self.op.snode)
6479       self.needed_locks[locking.LEVEL_NODE] = nodelist
6480
6481     # in case of import lock the source node too
6482     if self.op.mode == constants.INSTANCE_IMPORT:
6483       src_node = self.op.src_node
6484       src_path = self.op.src_path
6485
6486       if src_path is None:
6487         self.op.src_path = src_path = self.op.instance_name
6488
6489       if src_node is None:
6490         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6491         self.op.src_node = None
6492         if os.path.isabs(src_path):
6493           raise errors.OpPrereqError("Importing an instance from an absolute"
6494                                      " path requires a source node option.",
6495                                      errors.ECODE_INVAL)
6496       else:
6497         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6498         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6499           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6500         if not os.path.isabs(src_path):
6501           self.op.src_path = src_path = \
6502             utils.PathJoin(constants.EXPORT_DIR, src_path)
6503
6504   def _RunAllocator(self):
6505     """Run the allocator based on input opcode.
6506
6507     """
6508     nics = [n.ToDict() for n in self.nics]
6509     ial = IAllocator(self.cfg, self.rpc,
6510                      mode=constants.IALLOCATOR_MODE_ALLOC,
6511                      name=self.op.instance_name,
6512                      disk_template=self.op.disk_template,
6513                      tags=[],
6514                      os=self.op.os_type,
6515                      vcpus=self.be_full[constants.BE_VCPUS],
6516                      mem_size=self.be_full[constants.BE_MEMORY],
6517                      disks=self.disks,
6518                      nics=nics,
6519                      hypervisor=self.op.hypervisor,
6520                      )
6521
6522     ial.Run(self.op.iallocator)
6523
6524     if not ial.success:
6525       raise errors.OpPrereqError("Can't compute nodes using"
6526                                  " iallocator '%s': %s" %
6527                                  (self.op.iallocator, ial.info),
6528                                  errors.ECODE_NORES)
6529     if len(ial.result) != ial.required_nodes:
6530       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6531                                  " of nodes (%s), required %s" %
6532                                  (self.op.iallocator, len(ial.result),
6533                                   ial.required_nodes), errors.ECODE_FAULT)
6534     self.op.pnode = ial.result[0]
6535     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6536                  self.op.instance_name, self.op.iallocator,
6537                  utils.CommaJoin(ial.result))
6538     if ial.required_nodes == 2:
6539       self.op.snode = ial.result[1]
6540
6541   def BuildHooksEnv(self):
6542     """Build hooks env.
6543
6544     This runs on master, primary and secondary nodes of the instance.
6545
6546     """
6547     env = {
6548       "ADD_MODE": self.op.mode,
6549       }
6550     if self.op.mode == constants.INSTANCE_IMPORT:
6551       env["SRC_NODE"] = self.op.src_node
6552       env["SRC_PATH"] = self.op.src_path
6553       env["SRC_IMAGES"] = self.src_images
6554
6555     env.update(_BuildInstanceHookEnv(
6556       name=self.op.instance_name,
6557       primary_node=self.op.pnode,
6558       secondary_nodes=self.secondaries,
6559       status=self.op.start,
6560       os_type=self.op.os_type,
6561       memory=self.be_full[constants.BE_MEMORY],
6562       vcpus=self.be_full[constants.BE_VCPUS],
6563       nics=_NICListToTuple(self, self.nics),
6564       disk_template=self.op.disk_template,
6565       disks=[(d["size"], d["mode"]) for d in self.disks],
6566       bep=self.be_full,
6567       hvp=self.hv_full,
6568       hypervisor_name=self.op.hypervisor,
6569     ))
6570
6571     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6572           self.secondaries)
6573     return env, nl, nl
6574
6575   def _ReadExportInfo(self):
6576     """Reads the export information from disk.
6577
6578     It will override the opcode source node and path with the actual
6579     information, if these two were not specified before.
6580
6581     @return: the export information
6582
6583     """
6584     assert self.op.mode == constants.INSTANCE_IMPORT
6585
6586     src_node = self.op.src_node
6587     src_path = self.op.src_path
6588
6589     if src_node is None:
6590       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6591       exp_list = self.rpc.call_export_list(locked_nodes)
6592       found = False
6593       for node in exp_list:
6594         if exp_list[node].fail_msg:
6595           continue
6596         if src_path in exp_list[node].payload:
6597           found = True
6598           self.op.src_node = src_node = node
6599           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6600                                                        src_path)
6601           break
6602       if not found:
6603         raise errors.OpPrereqError("No export found for relative path %s" %
6604                                     src_path, errors.ECODE_INVAL)
6605
6606     _CheckNodeOnline(self, src_node)
6607     result = self.rpc.call_export_info(src_node, src_path)
6608     result.Raise("No export or invalid export found in dir %s" % src_path)
6609
6610     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6611     if not export_info.has_section(constants.INISECT_EXP):
6612       raise errors.ProgrammerError("Corrupted export config",
6613                                    errors.ECODE_ENVIRON)
6614
6615     ei_version = export_info.get(constants.INISECT_EXP, "version")
6616     if (int(ei_version) != constants.EXPORT_VERSION):
6617       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6618                                  (ei_version, constants.EXPORT_VERSION),
6619                                  errors.ECODE_ENVIRON)
6620     return export_info
6621
6622   def _ReadExportParams(self, einfo):
6623     """Use export parameters as defaults.
6624
6625     In case the opcode doesn't specify (as in override) some instance
6626     parameters, then try to use them from the export information, if
6627     that declares them.
6628
6629     """
6630     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6631
6632     if self.op.disk_template is None:
6633       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6634         self.op.disk_template = einfo.get(constants.INISECT_INS,
6635                                           "disk_template")
6636       else:
6637         raise errors.OpPrereqError("No disk template specified and the export"
6638                                    " is missing the disk_template information",
6639                                    errors.ECODE_INVAL)
6640
6641     if not self.op.disks:
6642       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6643         disks = []
6644         # TODO: import the disk iv_name too
6645         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6646           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6647           disks.append({"size": disk_sz})
6648         self.op.disks = disks
6649       else:
6650         raise errors.OpPrereqError("No disk info specified and the export"
6651                                    " is missing the disk information",
6652                                    errors.ECODE_INVAL)
6653
6654     if (not self.op.nics and
6655         einfo.has_option(constants.INISECT_INS, "nic_count")):
6656       nics = []
6657       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6658         ndict = {}
6659         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6660           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6661           ndict[name] = v
6662         nics.append(ndict)
6663       self.op.nics = nics
6664
6665     if (self.op.hypervisor is None and
6666         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6667       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6668     if einfo.has_section(constants.INISECT_HYP):
6669       # use the export parameters but do not override the ones
6670       # specified by the user
6671       for name, value in einfo.items(constants.INISECT_HYP):
6672         if name not in self.op.hvparams:
6673           self.op.hvparams[name] = value
6674
6675     if einfo.has_section(constants.INISECT_BEP):
6676       # use the parameters, without overriding
6677       for name, value in einfo.items(constants.INISECT_BEP):
6678         if name not in self.op.beparams:
6679           self.op.beparams[name] = value
6680     else:
6681       # try to read the parameters old style, from the main section
6682       for name in constants.BES_PARAMETERS:
6683         if (name not in self.op.beparams and
6684             einfo.has_option(constants.INISECT_INS, name)):
6685           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6686
6687     if einfo.has_section(constants.INISECT_OSP):
6688       # use the parameters, without overriding
6689       for name, value in einfo.items(constants.INISECT_OSP):
6690         if name not in self.op.osparams:
6691           self.op.osparams[name] = value
6692
6693   def _RevertToDefaults(self, cluster):
6694     """Revert the instance parameters to the default values.
6695
6696     """
6697     # hvparams
6698     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6699     for name in self.op.hvparams.keys():
6700       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6701         del self.op.hvparams[name]
6702     # beparams
6703     be_defs = cluster.SimpleFillBE({})
6704     for name in self.op.beparams.keys():
6705       if name in be_defs and be_defs[name] == self.op.beparams[name]:
6706         del self.op.beparams[name]
6707     # nic params
6708     nic_defs = cluster.SimpleFillNIC({})
6709     for nic in self.op.nics:
6710       for name in constants.NICS_PARAMETERS:
6711         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6712           del nic[name]
6713     # osparams
6714     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6715     for name in self.op.osparams.keys():
6716       if name in os_defs and os_defs[name] == self.op.osparams[name]:
6717         del self.op.osparams[name]
6718
6719   def CheckPrereq(self):
6720     """Check prerequisites.
6721
6722     """
6723     if self.op.mode == constants.INSTANCE_IMPORT:
6724       export_info = self._ReadExportInfo()
6725       self._ReadExportParams(export_info)
6726
6727     _CheckDiskTemplate(self.op.disk_template)
6728
6729     if (not self.cfg.GetVGName() and
6730         self.op.disk_template not in constants.DTS_NOT_LVM):
6731       raise errors.OpPrereqError("Cluster does not support lvm-based"
6732                                  " instances", errors.ECODE_STATE)
6733
6734     if self.op.hypervisor is None:
6735       self.op.hypervisor = self.cfg.GetHypervisorType()
6736
6737     cluster = self.cfg.GetClusterInfo()
6738     enabled_hvs = cluster.enabled_hypervisors
6739     if self.op.hypervisor not in enabled_hvs:
6740       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6741                                  " cluster (%s)" % (self.op.hypervisor,
6742                                   ",".join(enabled_hvs)),
6743                                  errors.ECODE_STATE)
6744
6745     # check hypervisor parameter syntax (locally)
6746     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6747     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6748                                       self.op.hvparams)
6749     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6750     hv_type.CheckParameterSyntax(filled_hvp)
6751     self.hv_full = filled_hvp
6752     # check that we don't specify global parameters on an instance
6753     _CheckGlobalHvParams(self.op.hvparams)
6754
6755     # fill and remember the beparams dict
6756     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6757     self.be_full = cluster.SimpleFillBE(self.op.beparams)
6758
6759     # build os parameters
6760     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6761
6762     # now that hvp/bep are in final format, let's reset to defaults,
6763     # if told to do so
6764     if self.op.identify_defaults:
6765       self._RevertToDefaults(cluster)
6766
6767     # NIC buildup
6768     self.nics = []
6769     for idx, nic in enumerate(self.op.nics):
6770       nic_mode_req = nic.get("mode", None)
6771       nic_mode = nic_mode_req
6772       if nic_mode is None:
6773         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6774
6775       # in routed mode, for the first nic, the default ip is 'auto'
6776       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6777         default_ip_mode = constants.VALUE_AUTO
6778       else:
6779         default_ip_mode = constants.VALUE_NONE
6780
6781       # ip validity checks
6782       ip = nic.get("ip", default_ip_mode)
6783       if ip is None or ip.lower() == constants.VALUE_NONE:
6784         nic_ip = None
6785       elif ip.lower() == constants.VALUE_AUTO:
6786         if not self.op.name_check:
6787           raise errors.OpPrereqError("IP address set to auto but name checks"
6788                                      " have been skipped. Aborting.",
6789                                      errors.ECODE_INVAL)
6790         nic_ip = self.hostname1.ip
6791       else:
6792         if not utils.IsValidIP(ip):
6793           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6794                                      " like a valid IP" % ip,
6795                                      errors.ECODE_INVAL)
6796         nic_ip = ip
6797
6798       # TODO: check the ip address for uniqueness
6799       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6800         raise errors.OpPrereqError("Routed nic mode requires an ip address",
6801                                    errors.ECODE_INVAL)
6802
6803       # MAC address verification
6804       mac = nic.get("mac", constants.VALUE_AUTO)
6805       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6806         mac = utils.NormalizeAndValidateMac(mac)
6807
6808         try:
6809           self.cfg.ReserveMAC(mac, self.proc.GetECId())
6810         except errors.ReservationError:
6811           raise errors.OpPrereqError("MAC address %s already in use"
6812                                      " in cluster" % mac,
6813                                      errors.ECODE_NOTUNIQUE)
6814
6815       # bridge verification
6816       bridge = nic.get("bridge", None)
6817       link = nic.get("link", None)
6818       if bridge and link:
6819         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6820                                    " at the same time", errors.ECODE_INVAL)
6821       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6822         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6823                                    errors.ECODE_INVAL)
6824       elif bridge:
6825         link = bridge
6826
6827       nicparams = {}
6828       if nic_mode_req:
6829         nicparams[constants.NIC_MODE] = nic_mode_req
6830       if link:
6831         nicparams[constants.NIC_LINK] = link
6832
6833       check_params = cluster.SimpleFillNIC(nicparams)
6834       objects.NIC.CheckParameterSyntax(check_params)
6835       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6836
6837     # disk checks/pre-build
6838     self.disks = []
6839     for disk in self.op.disks:
6840       mode = disk.get("mode", constants.DISK_RDWR)
6841       if mode not in constants.DISK_ACCESS_SET:
6842         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6843                                    mode, errors.ECODE_INVAL)
6844       size = disk.get("size", None)
6845       if size is None:
6846         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6847       try:
6848         size = int(size)
6849       except (TypeError, ValueError):
6850         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6851                                    errors.ECODE_INVAL)
6852       new_disk = {"size": size, "mode": mode}
6853       if "adopt" in disk:
6854         new_disk["adopt"] = disk["adopt"]
6855       self.disks.append(new_disk)
6856
6857     if self.op.mode == constants.INSTANCE_IMPORT:
6858
6859       # Check that the new instance doesn't have less disks than the export
6860       instance_disks = len(self.disks)
6861       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6862       if instance_disks < export_disks:
6863         raise errors.OpPrereqError("Not enough disks to import."
6864                                    " (instance: %d, export: %d)" %
6865                                    (instance_disks, export_disks),
6866                                    errors.ECODE_INVAL)
6867
6868       disk_images = []
6869       for idx in range(export_disks):
6870         option = 'disk%d_dump' % idx
6871         if export_info.has_option(constants.INISECT_INS, option):
6872           # FIXME: are the old os-es, disk sizes, etc. useful?
6873           export_name = export_info.get(constants.INISECT_INS, option)
6874           image = utils.PathJoin(self.op.src_path, export_name)
6875           disk_images.append(image)
6876         else:
6877           disk_images.append(False)
6878
6879       self.src_images = disk_images
6880
6881       old_name = export_info.get(constants.INISECT_INS, 'name')
6882       try:
6883         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6884       except (TypeError, ValueError), err:
6885         raise errors.OpPrereqError("Invalid export file, nic_count is not"
6886                                    " an integer: %s" % str(err),
6887                                    errors.ECODE_STATE)
6888       if self.op.instance_name == old_name:
6889         for idx, nic in enumerate(self.nics):
6890           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6891             nic_mac_ini = 'nic%d_mac' % idx
6892             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6893
6894     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6895
6896     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6897     if self.op.ip_check:
6898       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6899         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6900                                    (self.check_ip, self.op.instance_name),
6901                                    errors.ECODE_NOTUNIQUE)
6902
6903     #### mac address generation
6904     # By generating here the mac address both the allocator and the hooks get
6905     # the real final mac address rather than the 'auto' or 'generate' value.
6906     # There is a race condition between the generation and the instance object
6907     # creation, which means that we know the mac is valid now, but we're not
6908     # sure it will be when we actually add the instance. If things go bad
6909     # adding the instance will abort because of a duplicate mac, and the
6910     # creation job will fail.
6911     for nic in self.nics:
6912       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6913         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6914
6915     #### allocator run
6916
6917     if self.op.iallocator is not None:
6918       self._RunAllocator()
6919
6920     #### node related checks
6921
6922     # check primary node
6923     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6924     assert self.pnode is not None, \
6925       "Cannot retrieve locked node %s" % self.op.pnode
6926     if pnode.offline:
6927       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6928                                  pnode.name, errors.ECODE_STATE)
6929     if pnode.drained:
6930       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6931                                  pnode.name, errors.ECODE_STATE)
6932
6933     self.secondaries = []
6934
6935     # mirror node verification
6936     if self.op.disk_template in constants.DTS_NET_MIRROR:
6937       if self.op.snode is None:
6938         raise errors.OpPrereqError("The networked disk templates need"
6939                                    " a mirror node", errors.ECODE_INVAL)
6940       if self.op.snode == pnode.name:
6941         raise errors.OpPrereqError("The secondary node cannot be the"
6942                                    " primary node.", errors.ECODE_INVAL)
6943       _CheckNodeOnline(self, self.op.snode)
6944       _CheckNodeNotDrained(self, self.op.snode)
6945       self.secondaries.append(self.op.snode)
6946
6947     nodenames = [pnode.name] + self.secondaries
6948
6949     req_size = _ComputeDiskSize(self.op.disk_template,
6950                                 self.disks)
6951
6952     # Check lv size requirements, if not adopting
6953     if req_size is not None and not self.adopt_disks:
6954       _CheckNodesFreeDisk(self, nodenames, req_size)
6955
6956     if self.adopt_disks: # instead, we must check the adoption data
6957       all_lvs = set([i["adopt"] for i in self.disks])
6958       if len(all_lvs) != len(self.disks):
6959         raise errors.OpPrereqError("Duplicate volume names given for adoption",
6960                                    errors.ECODE_INVAL)
6961       for lv_name in all_lvs:
6962         try:
6963           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6964         except errors.ReservationError:
6965           raise errors.OpPrereqError("LV named %s used by another instance" %
6966                                      lv_name, errors.ECODE_NOTUNIQUE)
6967
6968       node_lvs = self.rpc.call_lv_list([pnode.name],
6969                                        self.cfg.GetVGName())[pnode.name]
6970       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6971       node_lvs = node_lvs.payload
6972       delta = all_lvs.difference(node_lvs.keys())
6973       if delta:
6974         raise errors.OpPrereqError("Missing logical volume(s): %s" %
6975                                    utils.CommaJoin(delta),
6976                                    errors.ECODE_INVAL)
6977       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6978       if online_lvs:
6979         raise errors.OpPrereqError("Online logical volumes found, cannot"
6980                                    " adopt: %s" % utils.CommaJoin(online_lvs),
6981                                    errors.ECODE_STATE)
6982       # update the size of disk based on what is found
6983       for dsk in self.disks:
6984         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6985
6986     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6987
6988     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6989     # check OS parameters (remotely)
6990     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
6991
6992     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6993
6994     # memory check on primary node
6995     if self.op.start:
6996       _CheckNodeFreeMemory(self, self.pnode.name,
6997                            "creating instance %s" % self.op.instance_name,
6998                            self.be_full[constants.BE_MEMORY],
6999                            self.op.hypervisor)
7000
7001     self.dry_run_result = list(nodenames)
7002
7003   def Exec(self, feedback_fn):
7004     """Create and add the instance to the cluster.
7005
7006     """
7007     instance = self.op.instance_name
7008     pnode_name = self.pnode.name
7009
7010     ht_kind = self.op.hypervisor
7011     if ht_kind in constants.HTS_REQ_PORT:
7012       network_port = self.cfg.AllocatePort()
7013     else:
7014       network_port = None
7015
7016     if constants.ENABLE_FILE_STORAGE:
7017       # this is needed because os.path.join does not accept None arguments
7018       if self.op.file_storage_dir is None:
7019         string_file_storage_dir = ""
7020       else:
7021         string_file_storage_dir = self.op.file_storage_dir
7022
7023       # build the full file storage dir path
7024       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
7025                                         string_file_storage_dir, instance)
7026     else:
7027       file_storage_dir = ""
7028
7029     disks = _GenerateDiskTemplate(self,
7030                                   self.op.disk_template,
7031                                   instance, pnode_name,
7032                                   self.secondaries,
7033                                   self.disks,
7034                                   file_storage_dir,
7035                                   self.op.file_driver,
7036                                   0)
7037
7038     iobj = objects.Instance(name=instance, os=self.op.os_type,
7039                             primary_node=pnode_name,
7040                             nics=self.nics, disks=disks,
7041                             disk_template=self.op.disk_template,
7042                             admin_up=False,
7043                             network_port=network_port,
7044                             beparams=self.op.beparams,
7045                             hvparams=self.op.hvparams,
7046                             hypervisor=self.op.hypervisor,
7047                             osparams=self.op.osparams,
7048                             )
7049
7050     if self.adopt_disks:
7051       # rename LVs to the newly-generated names; we need to construct
7052       # 'fake' LV disks with the old data, plus the new unique_id
7053       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
7054       rename_to = []
7055       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
7056         rename_to.append(t_dsk.logical_id)
7057         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
7058         self.cfg.SetDiskID(t_dsk, pnode_name)
7059       result = self.rpc.call_blockdev_rename(pnode_name,
7060                                              zip(tmp_disks, rename_to))
7061       result.Raise("Failed to rename adoped LVs")
7062     else:
7063       feedback_fn("* creating instance disks...")
7064       try:
7065         _CreateDisks(self, iobj)
7066       except errors.OpExecError:
7067         self.LogWarning("Device creation failed, reverting...")
7068         try:
7069           _RemoveDisks(self, iobj)
7070         finally:
7071           self.cfg.ReleaseDRBDMinors(instance)
7072           raise
7073
7074     feedback_fn("adding instance %s to cluster config" % instance)
7075
7076     self.cfg.AddInstance(iobj, self.proc.GetECId())
7077
7078     # Declare that we don't want to remove the instance lock anymore, as we've
7079     # added the instance to the config
7080     del self.remove_locks[locking.LEVEL_INSTANCE]
7081     # Unlock all the nodes
7082     if self.op.mode == constants.INSTANCE_IMPORT:
7083       nodes_keep = [self.op.src_node]
7084       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
7085                        if node != self.op.src_node]
7086       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
7087       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
7088     else:
7089       self.context.glm.release(locking.LEVEL_NODE)
7090       del self.acquired_locks[locking.LEVEL_NODE]
7091
7092     if self.op.wait_for_sync:
7093       disk_abort = not _WaitForSync(self, iobj)
7094     elif iobj.disk_template in constants.DTS_NET_MIRROR:
7095       # make sure the disks are not degraded (still sync-ing is ok)
7096       time.sleep(15)
7097       feedback_fn("* checking mirrors status")
7098       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
7099     else:
7100       disk_abort = False
7101
7102     if disk_abort:
7103       _RemoveDisks(self, iobj)
7104       self.cfg.RemoveInstance(iobj.name)
7105       # Make sure the instance lock gets removed
7106       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
7107       raise errors.OpExecError("There are some degraded disks for"
7108                                " this instance")
7109
7110     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
7111       if self.op.mode == constants.INSTANCE_CREATE:
7112         if not self.op.no_install:
7113           feedback_fn("* running the instance OS create scripts...")
7114           # FIXME: pass debug option from opcode to backend
7115           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
7116                                                  self.op.debug_level)
7117           result.Raise("Could not add os for instance %s"
7118                        " on node %s" % (instance, pnode_name))
7119
7120       elif self.op.mode == constants.INSTANCE_IMPORT:
7121         feedback_fn("* running the instance OS import scripts...")
7122
7123         transfers = []
7124
7125         for idx, image in enumerate(self.src_images):
7126           if not image:
7127             continue
7128
7129           # FIXME: pass debug option from opcode to backend
7130           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7131                                              constants.IEIO_FILE, (image, ),
7132                                              constants.IEIO_SCRIPT,
7133                                              (iobj.disks[idx], idx),
7134                                              None)
7135           transfers.append(dt)
7136
7137         import_result = \
7138           masterd.instance.TransferInstanceData(self, feedback_fn,
7139                                                 self.op.src_node, pnode_name,
7140                                                 self.pnode.secondary_ip,
7141                                                 iobj, transfers)
7142         if not compat.all(import_result):
7143           self.LogWarning("Some disks for instance %s on node %s were not"
7144                           " imported successfully" % (instance, pnode_name))
7145
7146       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7147         feedback_fn("* preparing remote import...")
7148         connect_timeout = constants.RIE_CONNECT_TIMEOUT
7149         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7150
7151         disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7152                                                      self.source_x509_ca,
7153                                                      self._cds, timeouts)
7154         if not compat.all(disk_results):
7155           # TODO: Should the instance still be started, even if some disks
7156           # failed to import (valid for local imports, too)?
7157           self.LogWarning("Some disks for instance %s on node %s were not"
7158                           " imported successfully" % (instance, pnode_name))
7159
7160         # Run rename script on newly imported instance
7161         assert iobj.name == instance
7162         feedback_fn("Running rename script for %s" % instance)
7163         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7164                                                    self.source_instance_name,
7165                                                    self.op.debug_level)
7166         if result.fail_msg:
7167           self.LogWarning("Failed to run rename script for %s on node"
7168                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7169
7170       else:
7171         # also checked in the prereq part
7172         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7173                                      % self.op.mode)
7174
7175     if self.op.start:
7176       iobj.admin_up = True
7177       self.cfg.Update(iobj, feedback_fn)
7178       logging.info("Starting instance %s on node %s", instance, pnode_name)
7179       feedback_fn("* starting instance...")
7180       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7181       result.Raise("Could not start instance")
7182
7183     return list(iobj.all_nodes)
7184
7185
7186 class LUConnectConsole(NoHooksLU):
7187   """Connect to an instance's console.
7188
7189   This is somewhat special in that it returns the command line that
7190   you need to run on the master node in order to connect to the
7191   console.
7192
7193   """
7194   _OP_REQP = ["instance_name"]
7195   REQ_BGL = False
7196
7197   def ExpandNames(self):
7198     self._ExpandAndLockInstance()
7199
7200   def CheckPrereq(self):
7201     """Check prerequisites.
7202
7203     This checks that the instance is in the cluster.
7204
7205     """
7206     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7207     assert self.instance is not None, \
7208       "Cannot retrieve locked instance %s" % self.op.instance_name
7209     _CheckNodeOnline(self, self.instance.primary_node)
7210
7211   def Exec(self, feedback_fn):
7212     """Connect to the console of an instance
7213
7214     """
7215     instance = self.instance
7216     node = instance.primary_node
7217
7218     node_insts = self.rpc.call_instance_list([node],
7219                                              [instance.hypervisor])[node]
7220     node_insts.Raise("Can't get node information from %s" % node)
7221
7222     if instance.name not in node_insts.payload:
7223       raise errors.OpExecError("Instance %s is not running." % instance.name)
7224
7225     logging.debug("Connecting to console of %s on %s", instance.name, node)
7226
7227     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7228     cluster = self.cfg.GetClusterInfo()
7229     # beparams and hvparams are passed separately, to avoid editing the
7230     # instance and then saving the defaults in the instance itself.
7231     hvparams = cluster.FillHV(instance)
7232     beparams = cluster.FillBE(instance)
7233     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7234
7235     # build ssh cmdline
7236     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7237
7238
7239 class LUReplaceDisks(LogicalUnit):
7240   """Replace the disks of an instance.
7241
7242   """
7243   HPATH = "mirrors-replace"
7244   HTYPE = constants.HTYPE_INSTANCE
7245   _OP_REQP = ["instance_name", "mode", "disks"]
7246   _OP_DEFS = [
7247     ("remote_node", None),
7248     ("iallocator", None),
7249     ("early_release", None),
7250     ]
7251   REQ_BGL = False
7252
7253   def CheckArguments(self):
7254     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7255                                   self.op.iallocator)
7256
7257   def ExpandNames(self):
7258     self._ExpandAndLockInstance()
7259
7260     if self.op.iallocator is not None:
7261       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7262
7263     elif self.op.remote_node is not None:
7264       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7265       self.op.remote_node = remote_node
7266
7267       # Warning: do not remove the locking of the new secondary here
7268       # unless DRBD8.AddChildren is changed to work in parallel;
7269       # currently it doesn't since parallel invocations of
7270       # FindUnusedMinor will conflict
7271       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7272       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7273
7274     else:
7275       self.needed_locks[locking.LEVEL_NODE] = []
7276       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7277
7278     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7279                                    self.op.iallocator, self.op.remote_node,
7280                                    self.op.disks, False, self.op.early_release)
7281
7282     self.tasklets = [self.replacer]
7283
7284   def DeclareLocks(self, level):
7285     # If we're not already locking all nodes in the set we have to declare the
7286     # instance's primary/secondary nodes.
7287     if (level == locking.LEVEL_NODE and
7288         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7289       self._LockInstancesNodes()
7290
7291   def BuildHooksEnv(self):
7292     """Build hooks env.
7293
7294     This runs on the master, the primary and all the secondaries.
7295
7296     """
7297     instance = self.replacer.instance
7298     env = {
7299       "MODE": self.op.mode,
7300       "NEW_SECONDARY": self.op.remote_node,
7301       "OLD_SECONDARY": instance.secondary_nodes[0],
7302       }
7303     env.update(_BuildInstanceHookEnvByObject(self, instance))
7304     nl = [
7305       self.cfg.GetMasterNode(),
7306       instance.primary_node,
7307       ]
7308     if self.op.remote_node is not None:
7309       nl.append(self.op.remote_node)
7310     return env, nl, nl
7311
7312
7313 class LUEvacuateNode(LogicalUnit):
7314   """Relocate the secondary instances from a node.
7315
7316   """
7317   HPATH = "node-evacuate"
7318   HTYPE = constants.HTYPE_NODE
7319   _OP_REQP = ["node_name"]
7320   _OP_DEFS = [
7321     ("remote_node", None),
7322     ("iallocator", None),
7323     ("early_release", False),
7324     ]
7325   REQ_BGL = False
7326
7327   def CheckArguments(self):
7328     TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
7329                                   self.op.remote_node,
7330                                   self.op.iallocator)
7331
7332   def ExpandNames(self):
7333     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7334
7335     self.needed_locks = {}
7336
7337     # Declare node locks
7338     if self.op.iallocator is not None:
7339       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7340
7341     elif self.op.remote_node is not None:
7342       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7343
7344       # Warning: do not remove the locking of the new secondary here
7345       # unless DRBD8.AddChildren is changed to work in parallel;
7346       # currently it doesn't since parallel invocations of
7347       # FindUnusedMinor will conflict
7348       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7349       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7350
7351     else:
7352       raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7353
7354     # Create tasklets for replacing disks for all secondary instances on this
7355     # node
7356     names = []
7357     tasklets = []
7358
7359     for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7360       logging.debug("Replacing disks for instance %s", inst.name)
7361       names.append(inst.name)
7362
7363       replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7364                                 self.op.iallocator, self.op.remote_node, [],
7365                                 True, self.op.early_release)
7366       tasklets.append(replacer)
7367
7368     self.tasklets = tasklets
7369     self.instance_names = names
7370
7371     # Declare instance locks
7372     self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7373
7374   def DeclareLocks(self, level):
7375     # If we're not already locking all nodes in the set we have to declare the
7376     # instance's primary/secondary nodes.
7377     if (level == locking.LEVEL_NODE and
7378         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7379       self._LockInstancesNodes()
7380
7381   def BuildHooksEnv(self):
7382     """Build hooks env.
7383
7384     This runs on the master, the primary and all the secondaries.
7385
7386     """
7387     env = {
7388       "NODE_NAME": self.op.node_name,
7389       }
7390
7391     nl = [self.cfg.GetMasterNode()]
7392
7393     if self.op.remote_node is not None:
7394       env["NEW_SECONDARY"] = self.op.remote_node
7395       nl.append(self.op.remote_node)
7396
7397     return (env, nl, nl)
7398
7399
7400 class TLReplaceDisks(Tasklet):
7401   """Replaces disks for an instance.
7402
7403   Note: Locking is not within the scope of this class.
7404
7405   """
7406   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7407                disks, delay_iallocator, early_release):
7408     """Initializes this class.
7409
7410     """
7411     Tasklet.__init__(self, lu)
7412
7413     # Parameters
7414     self.instance_name = instance_name
7415     self.mode = mode
7416     self.iallocator_name = iallocator_name
7417     self.remote_node = remote_node
7418     self.disks = disks
7419     self.delay_iallocator = delay_iallocator
7420     self.early_release = early_release
7421
7422     # Runtime data
7423     self.instance = None
7424     self.new_node = None
7425     self.target_node = None
7426     self.other_node = None
7427     self.remote_node_info = None
7428     self.node_secondary_ip = None
7429
7430   @staticmethod
7431   def CheckArguments(mode, remote_node, iallocator):
7432     """Helper function for users of this class.
7433
7434     """
7435     # check for valid parameter combination
7436     if mode == constants.REPLACE_DISK_CHG:
7437       if remote_node is None and iallocator is None:
7438         raise errors.OpPrereqError("When changing the secondary either an"
7439                                    " iallocator script must be used or the"
7440                                    " new node given", errors.ECODE_INVAL)
7441
7442       if remote_node is not None and iallocator is not None:
7443         raise errors.OpPrereqError("Give either the iallocator or the new"
7444                                    " secondary, not both", errors.ECODE_INVAL)
7445
7446     elif remote_node is not None or iallocator is not None:
7447       # Not replacing the secondary
7448       raise errors.OpPrereqError("The iallocator and new node options can"
7449                                  " only be used when changing the"
7450                                  " secondary node", errors.ECODE_INVAL)
7451
7452   @staticmethod
7453   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7454     """Compute a new secondary node using an IAllocator.
7455
7456     """
7457     ial = IAllocator(lu.cfg, lu.rpc,
7458                      mode=constants.IALLOCATOR_MODE_RELOC,
7459                      name=instance_name,
7460                      relocate_from=relocate_from)
7461
7462     ial.Run(iallocator_name)
7463
7464     if not ial.success:
7465       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7466                                  " %s" % (iallocator_name, ial.info),
7467                                  errors.ECODE_NORES)
7468
7469     if len(ial.result) != ial.required_nodes:
7470       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7471                                  " of nodes (%s), required %s" %
7472                                  (iallocator_name,
7473                                   len(ial.result), ial.required_nodes),
7474                                  errors.ECODE_FAULT)
7475
7476     remote_node_name = ial.result[0]
7477
7478     lu.LogInfo("Selected new secondary for instance '%s': %s",
7479                instance_name, remote_node_name)
7480
7481     return remote_node_name
7482
7483   def _FindFaultyDisks(self, node_name):
7484     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7485                                     node_name, True)
7486
7487   def CheckPrereq(self):
7488     """Check prerequisites.
7489
7490     This checks that the instance is in the cluster.
7491
7492     """
7493     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7494     assert instance is not None, \
7495       "Cannot retrieve locked instance %s" % self.instance_name
7496
7497     if instance.disk_template != constants.DT_DRBD8:
7498       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7499                                  " instances", errors.ECODE_INVAL)
7500
7501     if len(instance.secondary_nodes) != 1:
7502       raise errors.OpPrereqError("The instance has a strange layout,"
7503                                  " expected one secondary but found %d" %
7504                                  len(instance.secondary_nodes),
7505                                  errors.ECODE_FAULT)
7506
7507     if not self.delay_iallocator:
7508       self._CheckPrereq2()
7509
7510   def _CheckPrereq2(self):
7511     """Check prerequisites, second part.
7512
7513     This function should always be part of CheckPrereq. It was separated and is
7514     now called from Exec because during node evacuation iallocator was only
7515     called with an unmodified cluster model, not taking planned changes into
7516     account.
7517
7518     """
7519     instance = self.instance
7520     secondary_node = instance.secondary_nodes[0]
7521
7522     if self.iallocator_name is None:
7523       remote_node = self.remote_node
7524     else:
7525       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7526                                        instance.name, instance.secondary_nodes)
7527
7528     if remote_node is not None:
7529       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7530       assert self.remote_node_info is not None, \
7531         "Cannot retrieve locked node %s" % remote_node
7532     else:
7533       self.remote_node_info = None
7534
7535     if remote_node == self.instance.primary_node:
7536       raise errors.OpPrereqError("The specified node is the primary node of"
7537                                  " the instance.", errors.ECODE_INVAL)
7538
7539     if remote_node == secondary_node:
7540       raise errors.OpPrereqError("The specified node is already the"
7541                                  " secondary node of the instance.",
7542                                  errors.ECODE_INVAL)
7543
7544     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7545                                     constants.REPLACE_DISK_CHG):
7546       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7547                                  errors.ECODE_INVAL)
7548
7549     if self.mode == constants.REPLACE_DISK_AUTO:
7550       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7551       faulty_secondary = self._FindFaultyDisks(secondary_node)
7552
7553       if faulty_primary and faulty_secondary:
7554         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7555                                    " one node and can not be repaired"
7556                                    " automatically" % self.instance_name,
7557                                    errors.ECODE_STATE)
7558
7559       if faulty_primary:
7560         self.disks = faulty_primary
7561         self.target_node = instance.primary_node
7562         self.other_node = secondary_node
7563         check_nodes = [self.target_node, self.other_node]
7564       elif faulty_secondary:
7565         self.disks = faulty_secondary
7566         self.target_node = secondary_node
7567         self.other_node = instance.primary_node
7568         check_nodes = [self.target_node, self.other_node]
7569       else:
7570         self.disks = []
7571         check_nodes = []
7572
7573     else:
7574       # Non-automatic modes
7575       if self.mode == constants.REPLACE_DISK_PRI:
7576         self.target_node = instance.primary_node
7577         self.other_node = secondary_node
7578         check_nodes = [self.target_node, self.other_node]
7579
7580       elif self.mode == constants.REPLACE_DISK_SEC:
7581         self.target_node = secondary_node
7582         self.other_node = instance.primary_node
7583         check_nodes = [self.target_node, self.other_node]
7584
7585       elif self.mode == constants.REPLACE_DISK_CHG:
7586         self.new_node = remote_node
7587         self.other_node = instance.primary_node
7588         self.target_node = secondary_node
7589         check_nodes = [self.new_node, self.other_node]
7590
7591         _CheckNodeNotDrained(self.lu, remote_node)
7592
7593         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7594         assert old_node_info is not None
7595         if old_node_info.offline and not self.early_release:
7596           # doesn't make sense to delay the release
7597           self.early_release = True
7598           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7599                           " early-release mode", secondary_node)
7600
7601       else:
7602         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7603                                      self.mode)
7604
7605       # If not specified all disks should be replaced
7606       if not self.disks:
7607         self.disks = range(len(self.instance.disks))
7608
7609     for node in check_nodes:
7610       _CheckNodeOnline(self.lu, node)
7611
7612     # Check whether disks are valid
7613     for disk_idx in self.disks:
7614       instance.FindDisk(disk_idx)
7615
7616     # Get secondary node IP addresses
7617     node_2nd_ip = {}
7618
7619     for node_name in [self.target_node, self.other_node, self.new_node]:
7620       if node_name is not None:
7621         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7622
7623     self.node_secondary_ip = node_2nd_ip
7624
7625   def Exec(self, feedback_fn):
7626     """Execute disk replacement.
7627
7628     This dispatches the disk replacement to the appropriate handler.
7629
7630     """
7631     if self.delay_iallocator:
7632       self._CheckPrereq2()
7633
7634     if not self.disks:
7635       feedback_fn("No disks need replacement")
7636       return
7637
7638     feedback_fn("Replacing disk(s) %s for %s" %
7639                 (utils.CommaJoin(self.disks), self.instance.name))
7640
7641     activate_disks = (not self.instance.admin_up)
7642
7643     # Activate the instance disks if we're replacing them on a down instance
7644     if activate_disks:
7645       _StartInstanceDisks(self.lu, self.instance, True)
7646
7647     try:
7648       # Should we replace the secondary node?
7649       if self.new_node is not None:
7650         fn = self._ExecDrbd8Secondary
7651       else:
7652         fn = self._ExecDrbd8DiskOnly
7653
7654       return fn(feedback_fn)
7655
7656     finally:
7657       # Deactivate the instance disks if we're replacing them on a
7658       # down instance
7659       if activate_disks:
7660         _SafeShutdownInstanceDisks(self.lu, self.instance)
7661
7662   def _CheckVolumeGroup(self, nodes):
7663     self.lu.LogInfo("Checking volume groups")
7664
7665     vgname = self.cfg.GetVGName()
7666
7667     # Make sure volume group exists on all involved nodes
7668     results = self.rpc.call_vg_list(nodes)
7669     if not results:
7670       raise errors.OpExecError("Can't list volume groups on the nodes")
7671
7672     for node in nodes:
7673       res = results[node]
7674       res.Raise("Error checking node %s" % node)
7675       if vgname not in res.payload:
7676         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7677                                  (vgname, node))
7678
7679   def _CheckDisksExistence(self, nodes):
7680     # Check disk existence
7681     for idx, dev in enumerate(self.instance.disks):
7682       if idx not in self.disks:
7683         continue
7684
7685       for node in nodes:
7686         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7687         self.cfg.SetDiskID(dev, node)
7688
7689         result = self.rpc.call_blockdev_find(node, dev)
7690
7691         msg = result.fail_msg
7692         if msg or not result.payload:
7693           if not msg:
7694             msg = "disk not found"
7695           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7696                                    (idx, node, msg))
7697
7698   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7699     for idx, dev in enumerate(self.instance.disks):
7700       if idx not in self.disks:
7701         continue
7702
7703       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7704                       (idx, node_name))
7705
7706       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7707                                    ldisk=ldisk):
7708         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7709                                  " replace disks for instance %s" %
7710                                  (node_name, self.instance.name))
7711
7712   def _CreateNewStorage(self, node_name):
7713     vgname = self.cfg.GetVGName()
7714     iv_names = {}
7715
7716     for idx, dev in enumerate(self.instance.disks):
7717       if idx not in self.disks:
7718         continue
7719
7720       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7721
7722       self.cfg.SetDiskID(dev, node_name)
7723
7724       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7725       names = _GenerateUniqueNames(self.lu, lv_names)
7726
7727       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7728                              logical_id=(vgname, names[0]))
7729       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7730                              logical_id=(vgname, names[1]))
7731
7732       new_lvs = [lv_data, lv_meta]
7733       old_lvs = dev.children
7734       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7735
7736       # we pass force_create=True to force the LVM creation
7737       for new_lv in new_lvs:
7738         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7739                         _GetInstanceInfoText(self.instance), False)
7740
7741     return iv_names
7742
7743   def _CheckDevices(self, node_name, iv_names):
7744     for name, (dev, _, _) in iv_names.iteritems():
7745       self.cfg.SetDiskID(dev, node_name)
7746
7747       result = self.rpc.call_blockdev_find(node_name, dev)
7748
7749       msg = result.fail_msg
7750       if msg or not result.payload:
7751         if not msg:
7752           msg = "disk not found"
7753         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7754                                  (name, msg))
7755
7756       if result.payload.is_degraded:
7757         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7758
7759   def _RemoveOldStorage(self, node_name, iv_names):
7760     for name, (_, old_lvs, _) in iv_names.iteritems():
7761       self.lu.LogInfo("Remove logical volumes for %s" % name)
7762
7763       for lv in old_lvs:
7764         self.cfg.SetDiskID(lv, node_name)
7765
7766         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7767         if msg:
7768           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7769                              hint="remove unused LVs manually")
7770
7771   def _ReleaseNodeLock(self, node_name):
7772     """Releases the lock for a given node."""
7773     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7774
7775   def _ExecDrbd8DiskOnly(self, feedback_fn):
7776     """Replace a disk on the primary or secondary for DRBD 8.
7777
7778     The algorithm for replace is quite complicated:
7779
7780       1. for each disk to be replaced:
7781
7782         1. create new LVs on the target node with unique names
7783         1. detach old LVs from the drbd device
7784         1. rename old LVs to name_replaced.<time_t>
7785         1. rename new LVs to old LVs
7786         1. attach the new LVs (with the old names now) to the drbd device
7787
7788       1. wait for sync across all devices
7789
7790       1. for each modified disk:
7791
7792         1. remove old LVs (which have the name name_replaces.<time_t>)
7793
7794     Failures are not very well handled.
7795
7796     """
7797     steps_total = 6
7798
7799     # Step: check device activation
7800     self.lu.LogStep(1, steps_total, "Check device existence")
7801     self._CheckDisksExistence([self.other_node, self.target_node])
7802     self._CheckVolumeGroup([self.target_node, self.other_node])
7803
7804     # Step: check other node consistency
7805     self.lu.LogStep(2, steps_total, "Check peer consistency")
7806     self._CheckDisksConsistency(self.other_node,
7807                                 self.other_node == self.instance.primary_node,
7808                                 False)
7809
7810     # Step: create new storage
7811     self.lu.LogStep(3, steps_total, "Allocate new storage")
7812     iv_names = self._CreateNewStorage(self.target_node)
7813
7814     # Step: for each lv, detach+rename*2+attach
7815     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7816     for dev, old_lvs, new_lvs in iv_names.itervalues():
7817       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7818
7819       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7820                                                      old_lvs)
7821       result.Raise("Can't detach drbd from local storage on node"
7822                    " %s for device %s" % (self.target_node, dev.iv_name))
7823       #dev.children = []
7824       #cfg.Update(instance)
7825
7826       # ok, we created the new LVs, so now we know we have the needed
7827       # storage; as such, we proceed on the target node to rename
7828       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7829       # using the assumption that logical_id == physical_id (which in
7830       # turn is the unique_id on that node)
7831
7832       # FIXME(iustin): use a better name for the replaced LVs
7833       temp_suffix = int(time.time())
7834       ren_fn = lambda d, suff: (d.physical_id[0],
7835                                 d.physical_id[1] + "_replaced-%s" % suff)
7836
7837       # Build the rename list based on what LVs exist on the node
7838       rename_old_to_new = []
7839       for to_ren in old_lvs:
7840         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7841         if not result.fail_msg and result.payload:
7842           # device exists
7843           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7844
7845       self.lu.LogInfo("Renaming the old LVs on the target node")
7846       result = self.rpc.call_blockdev_rename(self.target_node,
7847                                              rename_old_to_new)
7848       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7849
7850       # Now we rename the new LVs to the old LVs
7851       self.lu.LogInfo("Renaming the new LVs on the target node")
7852       rename_new_to_old = [(new, old.physical_id)
7853                            for old, new in zip(old_lvs, new_lvs)]
7854       result = self.rpc.call_blockdev_rename(self.target_node,
7855                                              rename_new_to_old)
7856       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7857
7858       for old, new in zip(old_lvs, new_lvs):
7859         new.logical_id = old.logical_id
7860         self.cfg.SetDiskID(new, self.target_node)
7861
7862       for disk in old_lvs:
7863         disk.logical_id = ren_fn(disk, temp_suffix)
7864         self.cfg.SetDiskID(disk, self.target_node)
7865
7866       # Now that the new lvs have the old name, we can add them to the device
7867       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7868       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7869                                                   new_lvs)
7870       msg = result.fail_msg
7871       if msg:
7872         for new_lv in new_lvs:
7873           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7874                                                new_lv).fail_msg
7875           if msg2:
7876             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7877                                hint=("cleanup manually the unused logical"
7878                                      "volumes"))
7879         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7880
7881       dev.children = new_lvs
7882
7883       self.cfg.Update(self.instance, feedback_fn)
7884
7885     cstep = 5
7886     if self.early_release:
7887       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7888       cstep += 1
7889       self._RemoveOldStorage(self.target_node, iv_names)
7890       # WARNING: we release both node locks here, do not do other RPCs
7891       # than WaitForSync to the primary node
7892       self._ReleaseNodeLock([self.target_node, self.other_node])
7893
7894     # Wait for sync
7895     # This can fail as the old devices are degraded and _WaitForSync
7896     # does a combined result over all disks, so we don't check its return value
7897     self.lu.LogStep(cstep, steps_total, "Sync devices")
7898     cstep += 1
7899     _WaitForSync(self.lu, self.instance)
7900
7901     # Check all devices manually
7902     self._CheckDevices(self.instance.primary_node, iv_names)
7903
7904     # Step: remove old storage
7905     if not self.early_release:
7906       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7907       cstep += 1
7908       self._RemoveOldStorage(self.target_node, iv_names)
7909
7910   def _ExecDrbd8Secondary(self, feedback_fn):
7911     """Replace the secondary node for DRBD 8.
7912
7913     The algorithm for replace is quite complicated:
7914       - for all disks of the instance:
7915         - create new LVs on the new node with same names
7916         - shutdown the drbd device on the old secondary
7917         - disconnect the drbd network on the primary
7918         - create the drbd device on the new secondary
7919         - network attach the drbd on the primary, using an artifice:
7920           the drbd code for Attach() will connect to the network if it
7921           finds a device which is connected to the good local disks but
7922           not network enabled
7923       - wait for sync across all devices
7924       - remove all disks from the old secondary
7925
7926     Failures are not very well handled.
7927
7928     """
7929     steps_total = 6
7930
7931     # Step: check device activation
7932     self.lu.LogStep(1, steps_total, "Check device existence")
7933     self._CheckDisksExistence([self.instance.primary_node])
7934     self._CheckVolumeGroup([self.instance.primary_node])
7935
7936     # Step: check other node consistency
7937     self.lu.LogStep(2, steps_total, "Check peer consistency")
7938     self._CheckDisksConsistency(self.instance.primary_node, True, True)
7939
7940     # Step: create new storage
7941     self.lu.LogStep(3, steps_total, "Allocate new storage")
7942     for idx, dev in enumerate(self.instance.disks):
7943       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7944                       (self.new_node, idx))
7945       # we pass force_create=True to force LVM creation
7946       for new_lv in dev.children:
7947         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7948                         _GetInstanceInfoText(self.instance), False)
7949
7950     # Step 4: dbrd minors and drbd setups changes
7951     # after this, we must manually remove the drbd minors on both the
7952     # error and the success paths
7953     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7954     minors = self.cfg.AllocateDRBDMinor([self.new_node
7955                                          for dev in self.instance.disks],
7956                                         self.instance.name)
7957     logging.debug("Allocated minors %r", minors)
7958
7959     iv_names = {}
7960     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7961       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7962                       (self.new_node, idx))
7963       # create new devices on new_node; note that we create two IDs:
7964       # one without port, so the drbd will be activated without
7965       # networking information on the new node at this stage, and one
7966       # with network, for the latter activation in step 4
7967       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7968       if self.instance.primary_node == o_node1:
7969         p_minor = o_minor1
7970       else:
7971         assert self.instance.primary_node == o_node2, "Three-node instance?"
7972         p_minor = o_minor2
7973
7974       new_alone_id = (self.instance.primary_node, self.new_node, None,
7975                       p_minor, new_minor, o_secret)
7976       new_net_id = (self.instance.primary_node, self.new_node, o_port,
7977                     p_minor, new_minor, o_secret)
7978
7979       iv_names[idx] = (dev, dev.children, new_net_id)
7980       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7981                     new_net_id)
7982       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7983                               logical_id=new_alone_id,
7984                               children=dev.children,
7985                               size=dev.size)
7986       try:
7987         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7988                               _GetInstanceInfoText(self.instance), False)
7989       except errors.GenericError:
7990         self.cfg.ReleaseDRBDMinors(self.instance.name)
7991         raise
7992
7993     # We have new devices, shutdown the drbd on the old secondary
7994     for idx, dev in enumerate(self.instance.disks):
7995       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7996       self.cfg.SetDiskID(dev, self.target_node)
7997       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7998       if msg:
7999         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
8000                            "node: %s" % (idx, msg),
8001                            hint=("Please cleanup this device manually as"
8002                                  " soon as possible"))
8003
8004     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
8005     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
8006                                                self.node_secondary_ip,
8007                                                self.instance.disks)\
8008                                               [self.instance.primary_node]
8009
8010     msg = result.fail_msg
8011     if msg:
8012       # detaches didn't succeed (unlikely)
8013       self.cfg.ReleaseDRBDMinors(self.instance.name)
8014       raise errors.OpExecError("Can't detach the disks from the network on"
8015                                " old node: %s" % (msg,))
8016
8017     # if we managed to detach at least one, we update all the disks of
8018     # the instance to point to the new secondary
8019     self.lu.LogInfo("Updating instance configuration")
8020     for dev, _, new_logical_id in iv_names.itervalues():
8021       dev.logical_id = new_logical_id
8022       self.cfg.SetDiskID(dev, self.instance.primary_node)
8023
8024     self.cfg.Update(self.instance, feedback_fn)
8025
8026     # and now perform the drbd attach
8027     self.lu.LogInfo("Attaching primary drbds to new secondary"
8028                     " (standalone => connected)")
8029     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
8030                                             self.new_node],
8031                                            self.node_secondary_ip,
8032                                            self.instance.disks,
8033                                            self.instance.name,
8034                                            False)
8035     for to_node, to_result in result.items():
8036       msg = to_result.fail_msg
8037       if msg:
8038         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
8039                            to_node, msg,
8040                            hint=("please do a gnt-instance info to see the"
8041                                  " status of disks"))
8042     cstep = 5
8043     if self.early_release:
8044       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8045       cstep += 1
8046       self._RemoveOldStorage(self.target_node, iv_names)
8047       # WARNING: we release all node locks here, do not do other RPCs
8048       # than WaitForSync to the primary node
8049       self._ReleaseNodeLock([self.instance.primary_node,
8050                              self.target_node,
8051                              self.new_node])
8052
8053     # Wait for sync
8054     # This can fail as the old devices are degraded and _WaitForSync
8055     # does a combined result over all disks, so we don't check its return value
8056     self.lu.LogStep(cstep, steps_total, "Sync devices")
8057     cstep += 1
8058     _WaitForSync(self.lu, self.instance)
8059
8060     # Check all devices manually
8061     self._CheckDevices(self.instance.primary_node, iv_names)
8062
8063     # Step: remove old storage
8064     if not self.early_release:
8065       self.lu.LogStep(cstep, steps_total, "Removing old storage")
8066       self._RemoveOldStorage(self.target_node, iv_names)
8067
8068
8069 class LURepairNodeStorage(NoHooksLU):
8070   """Repairs the volume group on a node.
8071
8072   """
8073   _OP_REQP = ["node_name"]
8074   REQ_BGL = False
8075
8076   def CheckArguments(self):
8077     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
8078
8079     _CheckStorageType(self.op.storage_type)
8080
8081   def ExpandNames(self):
8082     self.needed_locks = {
8083       locking.LEVEL_NODE: [self.op.node_name],
8084       }
8085
8086   def _CheckFaultyDisks(self, instance, node_name):
8087     """Ensure faulty disks abort the opcode or at least warn."""
8088     try:
8089       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
8090                                   node_name, True):
8091         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
8092                                    " node '%s'" % (instance.name, node_name),
8093                                    errors.ECODE_STATE)
8094     except errors.OpPrereqError, err:
8095       if self.op.ignore_consistency:
8096         self.proc.LogWarning(str(err.args[0]))
8097       else:
8098         raise
8099
8100   def CheckPrereq(self):
8101     """Check prerequisites.
8102
8103     """
8104     storage_type = self.op.storage_type
8105
8106     if (constants.SO_FIX_CONSISTENCY not in
8107         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
8108       raise errors.OpPrereqError("Storage units of type '%s' can not be"
8109                                  " repaired" % storage_type,
8110                                  errors.ECODE_INVAL)
8111
8112     # Check whether any instance on this node has faulty disks
8113     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
8114       if not inst.admin_up:
8115         continue
8116       check_nodes = set(inst.all_nodes)
8117       check_nodes.discard(self.op.node_name)
8118       for inst_node_name in check_nodes:
8119         self._CheckFaultyDisks(inst, inst_node_name)
8120
8121   def Exec(self, feedback_fn):
8122     feedback_fn("Repairing storage unit '%s' on %s ..." %
8123                 (self.op.name, self.op.node_name))
8124
8125     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8126     result = self.rpc.call_storage_execute(self.op.node_name,
8127                                            self.op.storage_type, st_args,
8128                                            self.op.name,
8129                                            constants.SO_FIX_CONSISTENCY)
8130     result.Raise("Failed to repair storage unit '%s' on %s" %
8131                  (self.op.name, self.op.node_name))
8132
8133
8134 class LUNodeEvacuationStrategy(NoHooksLU):
8135   """Computes the node evacuation strategy.
8136
8137   """
8138   _OP_REQP = ["nodes"]
8139   _OP_DEFS = [
8140     ("remote_node", None),
8141     ("iallocator", None),
8142     ]
8143   REQ_BGL = False
8144
8145   def CheckArguments(self):
8146     if self.op.remote_node is not None and self.op.iallocator is not None:
8147       raise errors.OpPrereqError("Give either the iallocator or the new"
8148                                  " secondary, not both", errors.ECODE_INVAL)
8149
8150   def ExpandNames(self):
8151     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8152     self.needed_locks = locks = {}
8153     if self.op.remote_node is None:
8154       locks[locking.LEVEL_NODE] = locking.ALL_SET
8155     else:
8156       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8157       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8158
8159   def CheckPrereq(self):
8160     pass
8161
8162   def Exec(self, feedback_fn):
8163     if self.op.remote_node is not None:
8164       instances = []
8165       for node in self.op.nodes:
8166         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8167       result = []
8168       for i in instances:
8169         if i.primary_node == self.op.remote_node:
8170           raise errors.OpPrereqError("Node %s is the primary node of"
8171                                      " instance %s, cannot use it as"
8172                                      " secondary" %
8173                                      (self.op.remote_node, i.name),
8174                                      errors.ECODE_INVAL)
8175         result.append([i.name, self.op.remote_node])
8176     else:
8177       ial = IAllocator(self.cfg, self.rpc,
8178                        mode=constants.IALLOCATOR_MODE_MEVAC,
8179                        evac_nodes=self.op.nodes)
8180       ial.Run(self.op.iallocator, validate=True)
8181       if not ial.success:
8182         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8183                                  errors.ECODE_NORES)
8184       result = ial.result
8185     return result
8186
8187
8188 class LUGrowDisk(LogicalUnit):
8189   """Grow a disk of an instance.
8190
8191   """
8192   HPATH = "disk-grow"
8193   HTYPE = constants.HTYPE_INSTANCE
8194   _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
8195   REQ_BGL = False
8196
8197   def ExpandNames(self):
8198     self._ExpandAndLockInstance()
8199     self.needed_locks[locking.LEVEL_NODE] = []
8200     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8201
8202   def DeclareLocks(self, level):
8203     if level == locking.LEVEL_NODE:
8204       self._LockInstancesNodes()
8205
8206   def BuildHooksEnv(self):
8207     """Build hooks env.
8208
8209     This runs on the master, the primary and all the secondaries.
8210
8211     """
8212     env = {
8213       "DISK": self.op.disk,
8214       "AMOUNT": self.op.amount,
8215       }
8216     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8217     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8218     return env, nl, nl
8219
8220   def CheckPrereq(self):
8221     """Check prerequisites.
8222
8223     This checks that the instance is in the cluster.
8224
8225     """
8226     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8227     assert instance is not None, \
8228       "Cannot retrieve locked instance %s" % self.op.instance_name
8229     nodenames = list(instance.all_nodes)
8230     for node in nodenames:
8231       _CheckNodeOnline(self, node)
8232
8233
8234     self.instance = instance
8235
8236     if instance.disk_template not in constants.DTS_GROWABLE:
8237       raise errors.OpPrereqError("Instance's disk layout does not support"
8238                                  " growing.", errors.ECODE_INVAL)
8239
8240     self.disk = instance.FindDisk(self.op.disk)
8241
8242     if instance.disk_template != constants.DT_FILE:
8243       # TODO: check the free disk space for file, when that feature will be
8244       # supported
8245       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8246
8247   def Exec(self, feedback_fn):
8248     """Execute disk grow.
8249
8250     """
8251     instance = self.instance
8252     disk = self.disk
8253
8254     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8255     if not disks_ok:
8256       raise errors.OpExecError("Cannot activate block device to grow")
8257
8258     for node in instance.all_nodes:
8259       self.cfg.SetDiskID(disk, node)
8260       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8261       result.Raise("Grow request failed to node %s" % node)
8262
8263       # TODO: Rewrite code to work properly
8264       # DRBD goes into sync mode for a short amount of time after executing the
8265       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8266       # calling "resize" in sync mode fails. Sleeping for a short amount of
8267       # time is a work-around.
8268       time.sleep(5)
8269
8270     disk.RecordGrow(self.op.amount)
8271     self.cfg.Update(instance, feedback_fn)
8272     if self.op.wait_for_sync:
8273       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8274       if disk_abort:
8275         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8276                              " status.\nPlease check the instance.")
8277       if not instance.admin_up:
8278         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8279     elif not instance.admin_up:
8280       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8281                            " not supposed to be running because no wait for"
8282                            " sync mode was requested.")
8283
8284
8285 class LUQueryInstanceData(NoHooksLU):
8286   """Query runtime instance data.
8287
8288   """
8289   _OP_REQP = ["instances", "static"]
8290   REQ_BGL = False
8291
8292   def CheckArguments(self):
8293     if not isinstance(self.op.instances, list):
8294       raise errors.OpPrereqError("Invalid argument type 'instances'",
8295                                  errors.ECODE_INVAL)
8296
8297   def ExpandNames(self):
8298     self.needed_locks = {}
8299     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8300
8301     if self.op.instances:
8302       self.wanted_names = []
8303       for name in self.op.instances:
8304         full_name = _ExpandInstanceName(self.cfg, name)
8305         self.wanted_names.append(full_name)
8306       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8307     else:
8308       self.wanted_names = None
8309       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8310
8311     self.needed_locks[locking.LEVEL_NODE] = []
8312     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8313
8314   def DeclareLocks(self, level):
8315     if level == locking.LEVEL_NODE:
8316       self._LockInstancesNodes()
8317
8318   def CheckPrereq(self):
8319     """Check prerequisites.
8320
8321     This only checks the optional instance list against the existing names.
8322
8323     """
8324     if self.wanted_names is None:
8325       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8326
8327     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8328                              in self.wanted_names]
8329     return
8330
8331   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8332     """Returns the status of a block device
8333
8334     """
8335     if self.op.static or not node:
8336       return None
8337
8338     self.cfg.SetDiskID(dev, node)
8339
8340     result = self.rpc.call_blockdev_find(node, dev)
8341     if result.offline:
8342       return None
8343
8344     result.Raise("Can't compute disk status for %s" % instance_name)
8345
8346     status = result.payload
8347     if status is None:
8348       return None
8349
8350     return (status.dev_path, status.major, status.minor,
8351             status.sync_percent, status.estimated_time,
8352             status.is_degraded, status.ldisk_status)
8353
8354   def _ComputeDiskStatus(self, instance, snode, dev):
8355     """Compute block device status.
8356
8357     """
8358     if dev.dev_type in constants.LDS_DRBD:
8359       # we change the snode then (otherwise we use the one passed in)
8360       if dev.logical_id[0] == instance.primary_node:
8361         snode = dev.logical_id[1]
8362       else:
8363         snode = dev.logical_id[0]
8364
8365     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8366                                               instance.name, dev)
8367     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8368
8369     if dev.children:
8370       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8371                       for child in dev.children]
8372     else:
8373       dev_children = []
8374
8375     data = {
8376       "iv_name": dev.iv_name,
8377       "dev_type": dev.dev_type,
8378       "logical_id": dev.logical_id,
8379       "physical_id": dev.physical_id,
8380       "pstatus": dev_pstatus,
8381       "sstatus": dev_sstatus,
8382       "children": dev_children,
8383       "mode": dev.mode,
8384       "size": dev.size,
8385       }
8386
8387     return data
8388
8389   def Exec(self, feedback_fn):
8390     """Gather and return data"""
8391     result = {}
8392
8393     cluster = self.cfg.GetClusterInfo()
8394
8395     for instance in self.wanted_instances:
8396       if not self.op.static:
8397         remote_info = self.rpc.call_instance_info(instance.primary_node,
8398                                                   instance.name,
8399                                                   instance.hypervisor)
8400         remote_info.Raise("Error checking node %s" % instance.primary_node)
8401         remote_info = remote_info.payload
8402         if remote_info and "state" in remote_info:
8403           remote_state = "up"
8404         else:
8405           remote_state = "down"
8406       else:
8407         remote_state = None
8408       if instance.admin_up:
8409         config_state = "up"
8410       else:
8411         config_state = "down"
8412
8413       disks = [self._ComputeDiskStatus(instance, None, device)
8414                for device in instance.disks]
8415
8416       idict = {
8417         "name": instance.name,
8418         "config_state": config_state,
8419         "run_state": remote_state,
8420         "pnode": instance.primary_node,
8421         "snodes": instance.secondary_nodes,
8422         "os": instance.os,
8423         # this happens to be the same format used for hooks
8424         "nics": _NICListToTuple(self, instance.nics),
8425         "disk_template": instance.disk_template,
8426         "disks": disks,
8427         "hypervisor": instance.hypervisor,
8428         "network_port": instance.network_port,
8429         "hv_instance": instance.hvparams,
8430         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8431         "be_instance": instance.beparams,
8432         "be_actual": cluster.FillBE(instance),
8433         "os_instance": instance.osparams,
8434         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8435         "serial_no": instance.serial_no,
8436         "mtime": instance.mtime,
8437         "ctime": instance.ctime,
8438         "uuid": instance.uuid,
8439         }
8440
8441       result[instance.name] = idict
8442
8443     return result
8444
8445
8446 class LUSetInstanceParams(LogicalUnit):
8447   """Modifies an instances's parameters.
8448
8449   """
8450   HPATH = "instance-modify"
8451   HTYPE = constants.HTYPE_INSTANCE
8452   _OP_REQP = ["instance_name"]
8453   _OP_DEFS = [
8454     ("nics", _EmptyList),
8455     ("disks", _EmptyList),
8456     ("beparams", _EmptyDict),
8457     ("hvparams", _EmptyDict),
8458     ("disk_template", None),
8459     ("remote_node", None),
8460     ("os_name", None),
8461     ("force_variant", False),
8462     ("osparams", None),
8463     ("force", False),
8464     ]
8465   REQ_BGL = False
8466
8467   def CheckArguments(self):
8468     if not (self.op.nics or self.op.disks or self.op.disk_template or
8469             self.op.hvparams or self.op.beparams or self.op.os_name):
8470       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8471
8472     if self.op.hvparams:
8473       _CheckGlobalHvParams(self.op.hvparams)
8474
8475     # Disk validation
8476     disk_addremove = 0
8477     for disk_op, disk_dict in self.op.disks:
8478       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8479       if disk_op == constants.DDM_REMOVE:
8480         disk_addremove += 1
8481         continue
8482       elif disk_op == constants.DDM_ADD:
8483         disk_addremove += 1
8484       else:
8485         if not isinstance(disk_op, int):
8486           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8487         if not isinstance(disk_dict, dict):
8488           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8489           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8490
8491       if disk_op == constants.DDM_ADD:
8492         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8493         if mode not in constants.DISK_ACCESS_SET:
8494           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8495                                      errors.ECODE_INVAL)
8496         size = disk_dict.get('size', None)
8497         if size is None:
8498           raise errors.OpPrereqError("Required disk parameter size missing",
8499                                      errors.ECODE_INVAL)
8500         try:
8501           size = int(size)
8502         except (TypeError, ValueError), err:
8503           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8504                                      str(err), errors.ECODE_INVAL)
8505         disk_dict['size'] = size
8506       else:
8507         # modification of disk
8508         if 'size' in disk_dict:
8509           raise errors.OpPrereqError("Disk size change not possible, use"
8510                                      " grow-disk", errors.ECODE_INVAL)
8511
8512     if disk_addremove > 1:
8513       raise errors.OpPrereqError("Only one disk add or remove operation"
8514                                  " supported at a time", errors.ECODE_INVAL)
8515
8516     if self.op.disks and self.op.disk_template is not None:
8517       raise errors.OpPrereqError("Disk template conversion and other disk"
8518                                  " changes not supported at the same time",
8519                                  errors.ECODE_INVAL)
8520
8521     if self.op.disk_template:
8522       _CheckDiskTemplate(self.op.disk_template)
8523       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8524           self.op.remote_node is None):
8525         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8526                                    " one requires specifying a secondary node",
8527                                    errors.ECODE_INVAL)
8528
8529     # NIC validation
8530     nic_addremove = 0
8531     for nic_op, nic_dict in self.op.nics:
8532       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8533       if nic_op == constants.DDM_REMOVE:
8534         nic_addremove += 1
8535         continue
8536       elif nic_op == constants.DDM_ADD:
8537         nic_addremove += 1
8538       else:
8539         if not isinstance(nic_op, int):
8540           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8541         if not isinstance(nic_dict, dict):
8542           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8543           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8544
8545       # nic_dict should be a dict
8546       nic_ip = nic_dict.get('ip', None)
8547       if nic_ip is not None:
8548         if nic_ip.lower() == constants.VALUE_NONE:
8549           nic_dict['ip'] = None
8550         else:
8551           if not utils.IsValidIP(nic_ip):
8552             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8553                                        errors.ECODE_INVAL)
8554
8555       nic_bridge = nic_dict.get('bridge', None)
8556       nic_link = nic_dict.get('link', None)
8557       if nic_bridge and nic_link:
8558         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8559                                    " at the same time", errors.ECODE_INVAL)
8560       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8561         nic_dict['bridge'] = None
8562       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8563         nic_dict['link'] = None
8564
8565       if nic_op == constants.DDM_ADD:
8566         nic_mac = nic_dict.get('mac', None)
8567         if nic_mac is None:
8568           nic_dict['mac'] = constants.VALUE_AUTO
8569
8570       if 'mac' in nic_dict:
8571         nic_mac = nic_dict['mac']
8572         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8573           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8574
8575         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8576           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8577                                      " modifying an existing nic",
8578                                      errors.ECODE_INVAL)
8579
8580     if nic_addremove > 1:
8581       raise errors.OpPrereqError("Only one NIC add or remove operation"
8582                                  " supported at a time", errors.ECODE_INVAL)
8583
8584   def ExpandNames(self):
8585     self._ExpandAndLockInstance()
8586     self.needed_locks[locking.LEVEL_NODE] = []
8587     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8588
8589   def DeclareLocks(self, level):
8590     if level == locking.LEVEL_NODE:
8591       self._LockInstancesNodes()
8592       if self.op.disk_template and self.op.remote_node:
8593         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8594         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8595
8596   def BuildHooksEnv(self):
8597     """Build hooks env.
8598
8599     This runs on the master, primary and secondaries.
8600
8601     """
8602     args = dict()
8603     if constants.BE_MEMORY in self.be_new:
8604       args['memory'] = self.be_new[constants.BE_MEMORY]
8605     if constants.BE_VCPUS in self.be_new:
8606       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8607     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8608     # information at all.
8609     if self.op.nics:
8610       args['nics'] = []
8611       nic_override = dict(self.op.nics)
8612       for idx, nic in enumerate(self.instance.nics):
8613         if idx in nic_override:
8614           this_nic_override = nic_override[idx]
8615         else:
8616           this_nic_override = {}
8617         if 'ip' in this_nic_override:
8618           ip = this_nic_override['ip']
8619         else:
8620           ip = nic.ip
8621         if 'mac' in this_nic_override:
8622           mac = this_nic_override['mac']
8623         else:
8624           mac = nic.mac
8625         if idx in self.nic_pnew:
8626           nicparams = self.nic_pnew[idx]
8627         else:
8628           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8629         mode = nicparams[constants.NIC_MODE]
8630         link = nicparams[constants.NIC_LINK]
8631         args['nics'].append((ip, mac, mode, link))
8632       if constants.DDM_ADD in nic_override:
8633         ip = nic_override[constants.DDM_ADD].get('ip', None)
8634         mac = nic_override[constants.DDM_ADD]['mac']
8635         nicparams = self.nic_pnew[constants.DDM_ADD]
8636         mode = nicparams[constants.NIC_MODE]
8637         link = nicparams[constants.NIC_LINK]
8638         args['nics'].append((ip, mac, mode, link))
8639       elif constants.DDM_REMOVE in nic_override:
8640         del args['nics'][-1]
8641
8642     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8643     if self.op.disk_template:
8644       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8645     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8646     return env, nl, nl
8647
8648   def CheckPrereq(self):
8649     """Check prerequisites.
8650
8651     This only checks the instance list against the existing names.
8652
8653     """
8654     # checking the new params on the primary/secondary nodes
8655
8656     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8657     cluster = self.cluster = self.cfg.GetClusterInfo()
8658     assert self.instance is not None, \
8659       "Cannot retrieve locked instance %s" % self.op.instance_name
8660     pnode = instance.primary_node
8661     nodelist = list(instance.all_nodes)
8662
8663     # OS change
8664     if self.op.os_name and not self.op.force:
8665       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8666                       self.op.force_variant)
8667       instance_os = self.op.os_name
8668     else:
8669       instance_os = instance.os
8670
8671     if self.op.disk_template:
8672       if instance.disk_template == self.op.disk_template:
8673         raise errors.OpPrereqError("Instance already has disk template %s" %
8674                                    instance.disk_template, errors.ECODE_INVAL)
8675
8676       if (instance.disk_template,
8677           self.op.disk_template) not in self._DISK_CONVERSIONS:
8678         raise errors.OpPrereqError("Unsupported disk template conversion from"
8679                                    " %s to %s" % (instance.disk_template,
8680                                                   self.op.disk_template),
8681                                    errors.ECODE_INVAL)
8682       if self.op.disk_template in constants.DTS_NET_MIRROR:
8683         _CheckNodeOnline(self, self.op.remote_node)
8684         _CheckNodeNotDrained(self, self.op.remote_node)
8685         disks = [{"size": d.size} for d in instance.disks]
8686         required = _ComputeDiskSize(self.op.disk_template, disks)
8687         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8688         _CheckInstanceDown(self, instance, "cannot change disk template")
8689
8690     # hvparams processing
8691     if self.op.hvparams:
8692       hv_type = instance.hypervisor
8693       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8694       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8695       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8696
8697       # local check
8698       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8699       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8700       self.hv_new = hv_new # the new actual values
8701       self.hv_inst = i_hvdict # the new dict (without defaults)
8702     else:
8703       self.hv_new = self.hv_inst = {}
8704
8705     # beparams processing
8706     if self.op.beparams:
8707       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8708                                    use_none=True)
8709       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8710       be_new = cluster.SimpleFillBE(i_bedict)
8711       self.be_new = be_new # the new actual values
8712       self.be_inst = i_bedict # the new dict (without defaults)
8713     else:
8714       self.be_new = self.be_inst = {}
8715
8716     # osparams processing
8717     if self.op.osparams:
8718       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8719       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8720       self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8721       self.os_inst = i_osdict # the new dict (without defaults)
8722     else:
8723       self.os_new = self.os_inst = {}
8724
8725     self.warn = []
8726
8727     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
8728       mem_check_list = [pnode]
8729       if be_new[constants.BE_AUTO_BALANCE]:
8730         # either we changed auto_balance to yes or it was from before
8731         mem_check_list.extend(instance.secondary_nodes)
8732       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8733                                                   instance.hypervisor)
8734       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8735                                          instance.hypervisor)
8736       pninfo = nodeinfo[pnode]
8737       msg = pninfo.fail_msg
8738       if msg:
8739         # Assume the primary node is unreachable and go ahead
8740         self.warn.append("Can't get info from primary node %s: %s" %
8741                          (pnode,  msg))
8742       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8743         self.warn.append("Node data from primary node %s doesn't contain"
8744                          " free memory information" % pnode)
8745       elif instance_info.fail_msg:
8746         self.warn.append("Can't get instance runtime information: %s" %
8747                         instance_info.fail_msg)
8748       else:
8749         if instance_info.payload:
8750           current_mem = int(instance_info.payload['memory'])
8751         else:
8752           # Assume instance not running
8753           # (there is a slight race condition here, but it's not very probable,
8754           # and we have no other way to check)
8755           current_mem = 0
8756         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8757                     pninfo.payload['memory_free'])
8758         if miss_mem > 0:
8759           raise errors.OpPrereqError("This change will prevent the instance"
8760                                      " from starting, due to %d MB of memory"
8761                                      " missing on its primary node" % miss_mem,
8762                                      errors.ECODE_NORES)
8763
8764       if be_new[constants.BE_AUTO_BALANCE]:
8765         for node, nres in nodeinfo.items():
8766           if node not in instance.secondary_nodes:
8767             continue
8768           msg = nres.fail_msg
8769           if msg:
8770             self.warn.append("Can't get info from secondary node %s: %s" %
8771                              (node, msg))
8772           elif not isinstance(nres.payload.get('memory_free', None), int):
8773             self.warn.append("Secondary node %s didn't return free"
8774                              " memory information" % node)
8775           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8776             self.warn.append("Not enough memory to failover instance to"
8777                              " secondary node %s" % node)
8778
8779     # NIC processing
8780     self.nic_pnew = {}
8781     self.nic_pinst = {}
8782     for nic_op, nic_dict in self.op.nics:
8783       if nic_op == constants.DDM_REMOVE:
8784         if not instance.nics:
8785           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8786                                      errors.ECODE_INVAL)
8787         continue
8788       if nic_op != constants.DDM_ADD:
8789         # an existing nic
8790         if not instance.nics:
8791           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8792                                      " no NICs" % nic_op,
8793                                      errors.ECODE_INVAL)
8794         if nic_op < 0 or nic_op >= len(instance.nics):
8795           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8796                                      " are 0 to %d" %
8797                                      (nic_op, len(instance.nics) - 1),
8798                                      errors.ECODE_INVAL)
8799         old_nic_params = instance.nics[nic_op].nicparams
8800         old_nic_ip = instance.nics[nic_op].ip
8801       else:
8802         old_nic_params = {}
8803         old_nic_ip = None
8804
8805       update_params_dict = dict([(key, nic_dict[key])
8806                                  for key in constants.NICS_PARAMETERS
8807                                  if key in nic_dict])
8808
8809       if 'bridge' in nic_dict:
8810         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8811
8812       new_nic_params = _GetUpdatedParams(old_nic_params,
8813                                          update_params_dict)
8814       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8815       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8816       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8817       self.nic_pinst[nic_op] = new_nic_params
8818       self.nic_pnew[nic_op] = new_filled_nic_params
8819       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8820
8821       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8822         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8823         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8824         if msg:
8825           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8826           if self.op.force:
8827             self.warn.append(msg)
8828           else:
8829             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8830       if new_nic_mode == constants.NIC_MODE_ROUTED:
8831         if 'ip' in nic_dict:
8832           nic_ip = nic_dict['ip']
8833         else:
8834           nic_ip = old_nic_ip
8835         if nic_ip is None:
8836           raise errors.OpPrereqError('Cannot set the nic ip to None'
8837                                      ' on a routed nic', errors.ECODE_INVAL)
8838       if 'mac' in nic_dict:
8839         nic_mac = nic_dict['mac']
8840         if nic_mac is None:
8841           raise errors.OpPrereqError('Cannot set the nic mac to None',
8842                                      errors.ECODE_INVAL)
8843         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8844           # otherwise generate the mac
8845           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8846         else:
8847           # or validate/reserve the current one
8848           try:
8849             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8850           except errors.ReservationError:
8851             raise errors.OpPrereqError("MAC address %s already in use"
8852                                        " in cluster" % nic_mac,
8853                                        errors.ECODE_NOTUNIQUE)
8854
8855     # DISK processing
8856     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8857       raise errors.OpPrereqError("Disk operations not supported for"
8858                                  " diskless instances",
8859                                  errors.ECODE_INVAL)
8860     for disk_op, _ in self.op.disks:
8861       if disk_op == constants.DDM_REMOVE:
8862         if len(instance.disks) == 1:
8863           raise errors.OpPrereqError("Cannot remove the last disk of"
8864                                      " an instance", errors.ECODE_INVAL)
8865         _CheckInstanceDown(self, instance, "cannot remove disks")
8866
8867       if (disk_op == constants.DDM_ADD and
8868           len(instance.nics) >= constants.MAX_DISKS):
8869         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8870                                    " add more" % constants.MAX_DISKS,
8871                                    errors.ECODE_STATE)
8872       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8873         # an existing disk
8874         if disk_op < 0 or disk_op >= len(instance.disks):
8875           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8876                                      " are 0 to %d" %
8877                                      (disk_op, len(instance.disks)),
8878                                      errors.ECODE_INVAL)
8879
8880     return
8881
8882   def _ConvertPlainToDrbd(self, feedback_fn):
8883     """Converts an instance from plain to drbd.
8884
8885     """
8886     feedback_fn("Converting template to drbd")
8887     instance = self.instance
8888     pnode = instance.primary_node
8889     snode = self.op.remote_node
8890
8891     # create a fake disk info for _GenerateDiskTemplate
8892     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8893     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8894                                       instance.name, pnode, [snode],
8895                                       disk_info, None, None, 0)
8896     info = _GetInstanceInfoText(instance)
8897     feedback_fn("Creating aditional volumes...")
8898     # first, create the missing data and meta devices
8899     for disk in new_disks:
8900       # unfortunately this is... not too nice
8901       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8902                             info, True)
8903       for child in disk.children:
8904         _CreateSingleBlockDev(self, snode, instance, child, info, True)
8905     # at this stage, all new LVs have been created, we can rename the
8906     # old ones
8907     feedback_fn("Renaming original volumes...")
8908     rename_list = [(o, n.children[0].logical_id)
8909                    for (o, n) in zip(instance.disks, new_disks)]
8910     result = self.rpc.call_blockdev_rename(pnode, rename_list)
8911     result.Raise("Failed to rename original LVs")
8912
8913     feedback_fn("Initializing DRBD devices...")
8914     # all child devices are in place, we can now create the DRBD devices
8915     for disk in new_disks:
8916       for node in [pnode, snode]:
8917         f_create = node == pnode
8918         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8919
8920     # at this point, the instance has been modified
8921     instance.disk_template = constants.DT_DRBD8
8922     instance.disks = new_disks
8923     self.cfg.Update(instance, feedback_fn)
8924
8925     # disks are created, waiting for sync
8926     disk_abort = not _WaitForSync(self, instance)
8927     if disk_abort:
8928       raise errors.OpExecError("There are some degraded disks for"
8929                                " this instance, please cleanup manually")
8930
8931   def _ConvertDrbdToPlain(self, feedback_fn):
8932     """Converts an instance from drbd to plain.
8933
8934     """
8935     instance = self.instance
8936     assert len(instance.secondary_nodes) == 1
8937     pnode = instance.primary_node
8938     snode = instance.secondary_nodes[0]
8939     feedback_fn("Converting template to plain")
8940
8941     old_disks = instance.disks
8942     new_disks = [d.children[0] for d in old_disks]
8943
8944     # copy over size and mode
8945     for parent, child in zip(old_disks, new_disks):
8946       child.size = parent.size
8947       child.mode = parent.mode
8948
8949     # update instance structure
8950     instance.disks = new_disks
8951     instance.disk_template = constants.DT_PLAIN
8952     self.cfg.Update(instance, feedback_fn)
8953
8954     feedback_fn("Removing volumes on the secondary node...")
8955     for disk in old_disks:
8956       self.cfg.SetDiskID(disk, snode)
8957       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8958       if msg:
8959         self.LogWarning("Could not remove block device %s on node %s,"
8960                         " continuing anyway: %s", disk.iv_name, snode, msg)
8961
8962     feedback_fn("Removing unneeded volumes on the primary node...")
8963     for idx, disk in enumerate(old_disks):
8964       meta = disk.children[1]
8965       self.cfg.SetDiskID(meta, pnode)
8966       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8967       if msg:
8968         self.LogWarning("Could not remove metadata for disk %d on node %s,"
8969                         " continuing anyway: %s", idx, pnode, msg)
8970
8971
8972   def Exec(self, feedback_fn):
8973     """Modifies an instance.
8974
8975     All parameters take effect only at the next restart of the instance.
8976
8977     """
8978     # Process here the warnings from CheckPrereq, as we don't have a
8979     # feedback_fn there.
8980     for warn in self.warn:
8981       feedback_fn("WARNING: %s" % warn)
8982
8983     result = []
8984     instance = self.instance
8985     # disk changes
8986     for disk_op, disk_dict in self.op.disks:
8987       if disk_op == constants.DDM_REMOVE:
8988         # remove the last disk
8989         device = instance.disks.pop()
8990         device_idx = len(instance.disks)
8991         for node, disk in device.ComputeNodeTree(instance.primary_node):
8992           self.cfg.SetDiskID(disk, node)
8993           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8994           if msg:
8995             self.LogWarning("Could not remove disk/%d on node %s: %s,"
8996                             " continuing anyway", device_idx, node, msg)
8997         result.append(("disk/%d" % device_idx, "remove"))
8998       elif disk_op == constants.DDM_ADD:
8999         # add a new disk
9000         if instance.disk_template == constants.DT_FILE:
9001           file_driver, file_path = instance.disks[0].logical_id
9002           file_path = os.path.dirname(file_path)
9003         else:
9004           file_driver = file_path = None
9005         disk_idx_base = len(instance.disks)
9006         new_disk = _GenerateDiskTemplate(self,
9007                                          instance.disk_template,
9008                                          instance.name, instance.primary_node,
9009                                          instance.secondary_nodes,
9010                                          [disk_dict],
9011                                          file_path,
9012                                          file_driver,
9013                                          disk_idx_base)[0]
9014         instance.disks.append(new_disk)
9015         info = _GetInstanceInfoText(instance)
9016
9017         logging.info("Creating volume %s for instance %s",
9018                      new_disk.iv_name, instance.name)
9019         # Note: this needs to be kept in sync with _CreateDisks
9020         #HARDCODE
9021         for node in instance.all_nodes:
9022           f_create = node == instance.primary_node
9023           try:
9024             _CreateBlockDev(self, node, instance, new_disk,
9025                             f_create, info, f_create)
9026           except errors.OpExecError, err:
9027             self.LogWarning("Failed to create volume %s (%s) on"
9028                             " node %s: %s",
9029                             new_disk.iv_name, new_disk, node, err)
9030         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
9031                        (new_disk.size, new_disk.mode)))
9032       else:
9033         # change a given disk
9034         instance.disks[disk_op].mode = disk_dict['mode']
9035         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
9036
9037     if self.op.disk_template:
9038       r_shut = _ShutdownInstanceDisks(self, instance)
9039       if not r_shut:
9040         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
9041                                  " proceed with disk template conversion")
9042       mode = (instance.disk_template, self.op.disk_template)
9043       try:
9044         self._DISK_CONVERSIONS[mode](self, feedback_fn)
9045       except:
9046         self.cfg.ReleaseDRBDMinors(instance.name)
9047         raise
9048       result.append(("disk_template", self.op.disk_template))
9049
9050     # NIC changes
9051     for nic_op, nic_dict in self.op.nics:
9052       if nic_op == constants.DDM_REMOVE:
9053         # remove the last nic
9054         del instance.nics[-1]
9055         result.append(("nic.%d" % len(instance.nics), "remove"))
9056       elif nic_op == constants.DDM_ADD:
9057         # mac and bridge should be set, by now
9058         mac = nic_dict['mac']
9059         ip = nic_dict.get('ip', None)
9060         nicparams = self.nic_pinst[constants.DDM_ADD]
9061         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
9062         instance.nics.append(new_nic)
9063         result.append(("nic.%d" % (len(instance.nics) - 1),
9064                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
9065                        (new_nic.mac, new_nic.ip,
9066                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
9067                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
9068                        )))
9069       else:
9070         for key in 'mac', 'ip':
9071           if key in nic_dict:
9072             setattr(instance.nics[nic_op], key, nic_dict[key])
9073         if nic_op in self.nic_pinst:
9074           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
9075         for key, val in nic_dict.iteritems():
9076           result.append(("nic.%s/%d" % (key, nic_op), val))
9077
9078     # hvparams changes
9079     if self.op.hvparams:
9080       instance.hvparams = self.hv_inst
9081       for key, val in self.op.hvparams.iteritems():
9082         result.append(("hv/%s" % key, val))
9083
9084     # beparams changes
9085     if self.op.beparams:
9086       instance.beparams = self.be_inst
9087       for key, val in self.op.beparams.iteritems():
9088         result.append(("be/%s" % key, val))
9089
9090     # OS change
9091     if self.op.os_name:
9092       instance.os = self.op.os_name
9093
9094     # osparams changes
9095     if self.op.osparams:
9096       instance.osparams = self.os_inst
9097       for key, val in self.op.osparams.iteritems():
9098         result.append(("os/%s" % key, val))
9099
9100     self.cfg.Update(instance, feedback_fn)
9101
9102     return result
9103
9104   _DISK_CONVERSIONS = {
9105     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
9106     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
9107     }
9108
9109
9110 class LUQueryExports(NoHooksLU):
9111   """Query the exports list
9112
9113   """
9114   _OP_REQP = ['nodes']
9115   REQ_BGL = False
9116
9117   def ExpandNames(self):
9118     self.needed_locks = {}
9119     self.share_locks[locking.LEVEL_NODE] = 1
9120     if not self.op.nodes:
9121       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9122     else:
9123       self.needed_locks[locking.LEVEL_NODE] = \
9124         _GetWantedNodes(self, self.op.nodes)
9125
9126   def CheckPrereq(self):
9127     """Check prerequisites.
9128
9129     """
9130     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9131
9132   def Exec(self, feedback_fn):
9133     """Compute the list of all the exported system images.
9134
9135     @rtype: dict
9136     @return: a dictionary with the structure node->(export-list)
9137         where export-list is a list of the instances exported on
9138         that node.
9139
9140     """
9141     rpcresult = self.rpc.call_export_list(self.nodes)
9142     result = {}
9143     for node in rpcresult:
9144       if rpcresult[node].fail_msg:
9145         result[node] = False
9146       else:
9147         result[node] = rpcresult[node].payload
9148
9149     return result
9150
9151
9152 class LUPrepareExport(NoHooksLU):
9153   """Prepares an instance for an export and returns useful information.
9154
9155   """
9156   _OP_REQP = ["instance_name", "mode"]
9157   REQ_BGL = False
9158
9159   def CheckArguments(self):
9160     """Check the arguments.
9161
9162     """
9163     if self.op.mode not in constants.EXPORT_MODES:
9164       raise errors.OpPrereqError("Invalid export mode %r" % self.op.mode,
9165                                  errors.ECODE_INVAL)
9166
9167   def ExpandNames(self):
9168     self._ExpandAndLockInstance()
9169
9170   def CheckPrereq(self):
9171     """Check prerequisites.
9172
9173     """
9174     instance_name = self.op.instance_name
9175
9176     self.instance = self.cfg.GetInstanceInfo(instance_name)
9177     assert self.instance is not None, \
9178           "Cannot retrieve locked instance %s" % self.op.instance_name
9179     _CheckNodeOnline(self, self.instance.primary_node)
9180
9181     self._cds = _GetClusterDomainSecret()
9182
9183   def Exec(self, feedback_fn):
9184     """Prepares an instance for an export.
9185
9186     """
9187     instance = self.instance
9188
9189     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9190       salt = utils.GenerateSecret(8)
9191
9192       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9193       result = self.rpc.call_x509_cert_create(instance.primary_node,
9194                                               constants.RIE_CERT_VALIDITY)
9195       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9196
9197       (name, cert_pem) = result.payload
9198
9199       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9200                                              cert_pem)
9201
9202       return {
9203         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9204         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9205                           salt),
9206         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9207         }
9208
9209     return None
9210
9211
9212 class LUExportInstance(LogicalUnit):
9213   """Export an instance to an image in the cluster.
9214
9215   """
9216   HPATH = "instance-export"
9217   HTYPE = constants.HTYPE_INSTANCE
9218   _OP_REQP = ["instance_name", "target_node", "shutdown"]
9219   _OP_DEFS = [
9220     ("shutdown_timeout", constants.DEFAULT_SHUTDOWN_TIMEOUT),
9221     ("remove_instance", False),
9222     ("ignore_remove_failures", False),
9223     ("mode", constants.EXPORT_MODE_LOCAL),
9224     ("x509_key_name", None),
9225     ("destination_x509_ca", None),
9226     ]
9227   REQ_BGL = False
9228
9229   def CheckArguments(self):
9230     """Check the arguments.
9231
9232     """
9233     self.x509_key_name = self.op.x509_key_name
9234     self.dest_x509_ca_pem = self.op.destination_x509_ca
9235
9236     if self.op.remove_instance and not self.op.shutdown:
9237       raise errors.OpPrereqError("Can not remove instance without shutting it"
9238                                  " down before")
9239
9240     if self.op.mode not in constants.EXPORT_MODES:
9241       raise errors.OpPrereqError("Invalid export mode %r" % self.op.mode,
9242                                  errors.ECODE_INVAL)
9243
9244     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9245       if not self.x509_key_name:
9246         raise errors.OpPrereqError("Missing X509 key name for encryption",
9247                                    errors.ECODE_INVAL)
9248
9249       if not self.dest_x509_ca_pem:
9250         raise errors.OpPrereqError("Missing destination X509 CA",
9251                                    errors.ECODE_INVAL)
9252
9253   def ExpandNames(self):
9254     self._ExpandAndLockInstance()
9255
9256     # Lock all nodes for local exports
9257     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9258       # FIXME: lock only instance primary and destination node
9259       #
9260       # Sad but true, for now we have do lock all nodes, as we don't know where
9261       # the previous export might be, and in this LU we search for it and
9262       # remove it from its current node. In the future we could fix this by:
9263       #  - making a tasklet to search (share-lock all), then create the
9264       #    new one, then one to remove, after
9265       #  - removing the removal operation altogether
9266       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9267
9268   def DeclareLocks(self, level):
9269     """Last minute lock declaration."""
9270     # All nodes are locked anyway, so nothing to do here.
9271
9272   def BuildHooksEnv(self):
9273     """Build hooks env.
9274
9275     This will run on the master, primary node and target node.
9276
9277     """
9278     env = {
9279       "EXPORT_MODE": self.op.mode,
9280       "EXPORT_NODE": self.op.target_node,
9281       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9282       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
9283       # TODO: Generic function for boolean env variables
9284       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
9285       }
9286
9287     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9288
9289     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9290
9291     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9292       nl.append(self.op.target_node)
9293
9294     return env, nl, nl
9295
9296   def CheckPrereq(self):
9297     """Check prerequisites.
9298
9299     This checks that the instance and node names are valid.
9300
9301     """
9302     instance_name = self.op.instance_name
9303
9304     self.instance = self.cfg.GetInstanceInfo(instance_name)
9305     assert self.instance is not None, \
9306           "Cannot retrieve locked instance %s" % self.op.instance_name
9307     _CheckNodeOnline(self, self.instance.primary_node)
9308
9309     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9310       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9311       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9312       assert self.dst_node is not None
9313
9314       _CheckNodeOnline(self, self.dst_node.name)
9315       _CheckNodeNotDrained(self, self.dst_node.name)
9316
9317       self._cds = None
9318       self.dest_disk_info = None
9319       self.dest_x509_ca = None
9320
9321     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9322       self.dst_node = None
9323
9324       if len(self.op.target_node) != len(self.instance.disks):
9325         raise errors.OpPrereqError(("Received destination information for %s"
9326                                     " disks, but instance %s has %s disks") %
9327                                    (len(self.op.target_node), instance_name,
9328                                     len(self.instance.disks)),
9329                                    errors.ECODE_INVAL)
9330
9331       cds = _GetClusterDomainSecret()
9332
9333       # Check X509 key name
9334       try:
9335         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9336       except (TypeError, ValueError), err:
9337         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9338
9339       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9340         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9341                                    errors.ECODE_INVAL)
9342
9343       # Load and verify CA
9344       try:
9345         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9346       except OpenSSL.crypto.Error, err:
9347         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9348                                    (err, ), errors.ECODE_INVAL)
9349
9350       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9351       if errcode is not None:
9352         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
9353                                    (msg, ), errors.ECODE_INVAL)
9354
9355       self.dest_x509_ca = cert
9356
9357       # Verify target information
9358       disk_info = []
9359       for idx, disk_data in enumerate(self.op.target_node):
9360         try:
9361           (host, port, magic) = \
9362             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9363         except errors.GenericError, err:
9364           raise errors.OpPrereqError("Target info for disk %s: %s" %
9365                                      (idx, err), errors.ECODE_INVAL)
9366
9367         disk_info.append((host, port, magic))
9368
9369       assert len(disk_info) == len(self.op.target_node)
9370       self.dest_disk_info = disk_info
9371
9372     else:
9373       raise errors.ProgrammerError("Unhandled export mode %r" %
9374                                    self.op.mode)
9375
9376     # instance disk type verification
9377     # TODO: Implement export support for file-based disks
9378     for disk in self.instance.disks:
9379       if disk.dev_type == constants.LD_FILE:
9380         raise errors.OpPrereqError("Export not supported for instances with"
9381                                    " file-based disks", errors.ECODE_INVAL)
9382
9383   def _CleanupExports(self, feedback_fn):
9384     """Removes exports of current instance from all other nodes.
9385
9386     If an instance in a cluster with nodes A..D was exported to node C, its
9387     exports will be removed from the nodes A, B and D.
9388
9389     """
9390     assert self.op.mode != constants.EXPORT_MODE_REMOTE
9391
9392     nodelist = self.cfg.GetNodeList()
9393     nodelist.remove(self.dst_node.name)
9394
9395     # on one-node clusters nodelist will be empty after the removal
9396     # if we proceed the backup would be removed because OpQueryExports
9397     # substitutes an empty list with the full cluster node list.
9398     iname = self.instance.name
9399     if nodelist:
9400       feedback_fn("Removing old exports for instance %s" % iname)
9401       exportlist = self.rpc.call_export_list(nodelist)
9402       for node in exportlist:
9403         if exportlist[node].fail_msg:
9404           continue
9405         if iname in exportlist[node].payload:
9406           msg = self.rpc.call_export_remove(node, iname).fail_msg
9407           if msg:
9408             self.LogWarning("Could not remove older export for instance %s"
9409                             " on node %s: %s", iname, node, msg)
9410
9411   def Exec(self, feedback_fn):
9412     """Export an instance to an image in the cluster.
9413
9414     """
9415     assert self.op.mode in constants.EXPORT_MODES
9416
9417     instance = self.instance
9418     src_node = instance.primary_node
9419
9420     if self.op.shutdown:
9421       # shutdown the instance, but not the disks
9422       feedback_fn("Shutting down instance %s" % instance.name)
9423       result = self.rpc.call_instance_shutdown(src_node, instance,
9424                                                self.op.shutdown_timeout)
9425       # TODO: Maybe ignore failures if ignore_remove_failures is set
9426       result.Raise("Could not shutdown instance %s on"
9427                    " node %s" % (instance.name, src_node))
9428
9429     # set the disks ID correctly since call_instance_start needs the
9430     # correct drbd minor to create the symlinks
9431     for disk in instance.disks:
9432       self.cfg.SetDiskID(disk, src_node)
9433
9434     activate_disks = (not instance.admin_up)
9435
9436     if activate_disks:
9437       # Activate the instance disks if we'exporting a stopped instance
9438       feedback_fn("Activating disks for %s" % instance.name)
9439       _StartInstanceDisks(self, instance, None)
9440
9441     try:
9442       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9443                                                      instance)
9444
9445       helper.CreateSnapshots()
9446       try:
9447         if (self.op.shutdown and instance.admin_up and
9448             not self.op.remove_instance):
9449           assert not activate_disks
9450           feedback_fn("Starting instance %s" % instance.name)
9451           result = self.rpc.call_instance_start(src_node, instance, None, None)
9452           msg = result.fail_msg
9453           if msg:
9454             feedback_fn("Failed to start instance: %s" % msg)
9455             _ShutdownInstanceDisks(self, instance)
9456             raise errors.OpExecError("Could not start instance: %s" % msg)
9457
9458         if self.op.mode == constants.EXPORT_MODE_LOCAL:
9459           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9460         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
9461           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9462           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9463
9464           (key_name, _, _) = self.x509_key_name
9465
9466           dest_ca_pem = \
9467             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9468                                             self.dest_x509_ca)
9469
9470           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9471                                                      key_name, dest_ca_pem,
9472                                                      timeouts)
9473       finally:
9474         helper.Cleanup()
9475
9476       # Check for backwards compatibility
9477       assert len(dresults) == len(instance.disks)
9478       assert compat.all(isinstance(i, bool) for i in dresults), \
9479              "Not all results are boolean: %r" % dresults
9480
9481     finally:
9482       if activate_disks:
9483         feedback_fn("Deactivating disks for %s" % instance.name)
9484         _ShutdownInstanceDisks(self, instance)
9485
9486     # Remove instance if requested
9487     if self.op.remove_instance:
9488       if not (compat.all(dresults) and fin_resu):
9489         feedback_fn("Not removing instance %s as parts of the export failed" %
9490                     instance.name)
9491       else:
9492         feedback_fn("Removing instance %s" % instance.name)
9493         _RemoveInstance(self, feedback_fn, instance,
9494                         self.op.ignore_remove_failures)
9495
9496     if self.op.mode == constants.EXPORT_MODE_LOCAL:
9497       self._CleanupExports(feedback_fn)
9498
9499     return fin_resu, dresults
9500
9501
9502 class LURemoveExport(NoHooksLU):
9503   """Remove exports related to the named instance.
9504
9505   """
9506   _OP_REQP = ["instance_name"]
9507   REQ_BGL = False
9508
9509   def ExpandNames(self):
9510     self.needed_locks = {}
9511     # We need all nodes to be locked in order for RemoveExport to work, but we
9512     # don't need to lock the instance itself, as nothing will happen to it (and
9513     # we can remove exports also for a removed instance)
9514     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9515
9516   def CheckPrereq(self):
9517     """Check prerequisites.
9518     """
9519     pass
9520
9521   def Exec(self, feedback_fn):
9522     """Remove any export.
9523
9524     """
9525     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9526     # If the instance was not found we'll try with the name that was passed in.
9527     # This will only work if it was an FQDN, though.
9528     fqdn_warn = False
9529     if not instance_name:
9530       fqdn_warn = True
9531       instance_name = self.op.instance_name
9532
9533     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9534     exportlist = self.rpc.call_export_list(locked_nodes)
9535     found = False
9536     for node in exportlist:
9537       msg = exportlist[node].fail_msg
9538       if msg:
9539         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9540         continue
9541       if instance_name in exportlist[node].payload:
9542         found = True
9543         result = self.rpc.call_export_remove(node, instance_name)
9544         msg = result.fail_msg
9545         if msg:
9546           logging.error("Could not remove export for instance %s"
9547                         " on node %s: %s", instance_name, node, msg)
9548
9549     if fqdn_warn and not found:
9550       feedback_fn("Export not found. If trying to remove an export belonging"
9551                   " to a deleted instance please use its Fully Qualified"
9552                   " Domain Name.")
9553
9554
9555 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9556   """Generic tags LU.
9557
9558   This is an abstract class which is the parent of all the other tags LUs.
9559
9560   """
9561
9562   def ExpandNames(self):
9563     self.needed_locks = {}
9564     if self.op.kind == constants.TAG_NODE:
9565       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9566       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9567     elif self.op.kind == constants.TAG_INSTANCE:
9568       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9569       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9570
9571   def CheckPrereq(self):
9572     """Check prerequisites.
9573
9574     """
9575     if self.op.kind == constants.TAG_CLUSTER:
9576       self.target = self.cfg.GetClusterInfo()
9577     elif self.op.kind == constants.TAG_NODE:
9578       self.target = self.cfg.GetNodeInfo(self.op.name)
9579     elif self.op.kind == constants.TAG_INSTANCE:
9580       self.target = self.cfg.GetInstanceInfo(self.op.name)
9581     else:
9582       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9583                                  str(self.op.kind), errors.ECODE_INVAL)
9584
9585
9586 class LUGetTags(TagsLU):
9587   """Returns the tags of a given object.
9588
9589   """
9590   _OP_REQP = ["kind", "name"]
9591   REQ_BGL = False
9592
9593   def Exec(self, feedback_fn):
9594     """Returns the tag list.
9595
9596     """
9597     return list(self.target.GetTags())
9598
9599
9600 class LUSearchTags(NoHooksLU):
9601   """Searches the tags for a given pattern.
9602
9603   """
9604   _OP_REQP = ["pattern"]
9605   REQ_BGL = False
9606
9607   def ExpandNames(self):
9608     self.needed_locks = {}
9609
9610   def CheckPrereq(self):
9611     """Check prerequisites.
9612
9613     This checks the pattern passed for validity by compiling it.
9614
9615     """
9616     try:
9617       self.re = re.compile(self.op.pattern)
9618     except re.error, err:
9619       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9620                                  (self.op.pattern, err), errors.ECODE_INVAL)
9621
9622   def Exec(self, feedback_fn):
9623     """Returns the tag list.
9624
9625     """
9626     cfg = self.cfg
9627     tgts = [("/cluster", cfg.GetClusterInfo())]
9628     ilist = cfg.GetAllInstancesInfo().values()
9629     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9630     nlist = cfg.GetAllNodesInfo().values()
9631     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9632     results = []
9633     for path, target in tgts:
9634       for tag in target.GetTags():
9635         if self.re.search(tag):
9636           results.append((path, tag))
9637     return results
9638
9639
9640 class LUAddTags(TagsLU):
9641   """Sets a tag on a given object.
9642
9643   """
9644   _OP_REQP = ["kind", "name", "tags"]
9645   REQ_BGL = False
9646
9647   def CheckPrereq(self):
9648     """Check prerequisites.
9649
9650     This checks the type and length of the tag name and value.
9651
9652     """
9653     TagsLU.CheckPrereq(self)
9654     for tag in self.op.tags:
9655       objects.TaggableObject.ValidateTag(tag)
9656
9657   def Exec(self, feedback_fn):
9658     """Sets the tag.
9659
9660     """
9661     try:
9662       for tag in self.op.tags:
9663         self.target.AddTag(tag)
9664     except errors.TagError, err:
9665       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9666     self.cfg.Update(self.target, feedback_fn)
9667
9668
9669 class LUDelTags(TagsLU):
9670   """Delete a list of tags from a given object.
9671
9672   """
9673   _OP_REQP = ["kind", "name", "tags"]
9674   REQ_BGL = False
9675
9676   def CheckPrereq(self):
9677     """Check prerequisites.
9678
9679     This checks that we have the given tag.
9680
9681     """
9682     TagsLU.CheckPrereq(self)
9683     for tag in self.op.tags:
9684       objects.TaggableObject.ValidateTag(tag)
9685     del_tags = frozenset(self.op.tags)
9686     cur_tags = self.target.GetTags()
9687     if not del_tags <= cur_tags:
9688       diff_tags = del_tags - cur_tags
9689       diff_names = ["'%s'" % tag for tag in diff_tags]
9690       diff_names.sort()
9691       raise errors.OpPrereqError("Tag(s) %s not found" %
9692                                  (",".join(diff_names)), errors.ECODE_NOENT)
9693
9694   def Exec(self, feedback_fn):
9695     """Remove the tag from the object.
9696
9697     """
9698     for tag in self.op.tags:
9699       self.target.RemoveTag(tag)
9700     self.cfg.Update(self.target, feedback_fn)
9701
9702
9703 class LUTestDelay(NoHooksLU):
9704   """Sleep for a specified amount of time.
9705
9706   This LU sleeps on the master and/or nodes for a specified amount of
9707   time.
9708
9709   """
9710   _OP_REQP = ["duration", "on_master", "on_nodes"]
9711   REQ_BGL = False
9712
9713   def CheckArguments(self):
9714     # TODO: convert to the type system
9715     self.op.repeat = getattr(self.op, "repeat", 0)
9716     if self.op.repeat < 0:
9717       raise errors.OpPrereqError("Repetition count cannot be negative")
9718
9719   def ExpandNames(self):
9720     """Expand names and set required locks.
9721
9722     This expands the node list, if any.
9723
9724     """
9725     self.needed_locks = {}
9726     if self.op.on_nodes:
9727       # _GetWantedNodes can be used here, but is not always appropriate to use
9728       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9729       # more information.
9730       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9731       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9732
9733   def CheckPrereq(self):
9734     """Check prerequisites.
9735
9736     """
9737
9738   def _TestDelay(self):
9739     """Do the actual sleep.
9740
9741     """
9742     if self.op.on_master:
9743       if not utils.TestDelay(self.op.duration):
9744         raise errors.OpExecError("Error during master delay test")
9745     if self.op.on_nodes:
9746       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9747       for node, node_result in result.items():
9748         node_result.Raise("Failure during rpc call to node %s" % node)
9749
9750   def Exec(self, feedback_fn):
9751     """Execute the test delay opcode, with the wanted repetitions.
9752
9753     """
9754     if self.op.repeat == 0:
9755       self._TestDelay()
9756     else:
9757       top_value = self.op.repeat - 1
9758       for i in range(self.op.repeat):
9759         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9760         self._TestDelay()
9761
9762
9763 class IAllocator(object):
9764   """IAllocator framework.
9765
9766   An IAllocator instance has three sets of attributes:
9767     - cfg that is needed to query the cluster
9768     - input data (all members of the _KEYS class attribute are required)
9769     - four buffer attributes (in|out_data|text), that represent the
9770       input (to the external script) in text and data structure format,
9771       and the output from it, again in two formats
9772     - the result variables from the script (success, info, nodes) for
9773       easy usage
9774
9775   """
9776   # pylint: disable-msg=R0902
9777   # lots of instance attributes
9778   _ALLO_KEYS = [
9779     "name", "mem_size", "disks", "disk_template",
9780     "os", "tags", "nics", "vcpus", "hypervisor",
9781     ]
9782   _RELO_KEYS = [
9783     "name", "relocate_from",
9784     ]
9785   _EVAC_KEYS = [
9786     "evac_nodes",
9787     ]
9788
9789   def __init__(self, cfg, rpc, mode, **kwargs):
9790     self.cfg = cfg
9791     self.rpc = rpc
9792     # init buffer variables
9793     self.in_text = self.out_text = self.in_data = self.out_data = None
9794     # init all input fields so that pylint is happy
9795     self.mode = mode
9796     self.mem_size = self.disks = self.disk_template = None
9797     self.os = self.tags = self.nics = self.vcpus = None
9798     self.hypervisor = None
9799     self.relocate_from = None
9800     self.name = None
9801     self.evac_nodes = None
9802     # computed fields
9803     self.required_nodes = None
9804     # init result fields
9805     self.success = self.info = self.result = None
9806     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9807       keyset = self._ALLO_KEYS
9808       fn = self._AddNewInstance
9809     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9810       keyset = self._RELO_KEYS
9811       fn = self._AddRelocateInstance
9812     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9813       keyset = self._EVAC_KEYS
9814       fn = self._AddEvacuateNodes
9815     else:
9816       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9817                                    " IAllocator" % self.mode)
9818     for key in kwargs:
9819       if key not in keyset:
9820         raise errors.ProgrammerError("Invalid input parameter '%s' to"
9821                                      " IAllocator" % key)
9822       setattr(self, key, kwargs[key])
9823
9824     for key in keyset:
9825       if key not in kwargs:
9826         raise errors.ProgrammerError("Missing input parameter '%s' to"
9827                                      " IAllocator" % key)
9828     self._BuildInputData(fn)
9829
9830   def _ComputeClusterData(self):
9831     """Compute the generic allocator input data.
9832
9833     This is the data that is independent of the actual operation.
9834
9835     """
9836     cfg = self.cfg
9837     cluster_info = cfg.GetClusterInfo()
9838     # cluster data
9839     data = {
9840       "version": constants.IALLOCATOR_VERSION,
9841       "cluster_name": cfg.GetClusterName(),
9842       "cluster_tags": list(cluster_info.GetTags()),
9843       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9844       # we don't have job IDs
9845       }
9846     iinfo = cfg.GetAllInstancesInfo().values()
9847     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9848
9849     # node data
9850     node_results = {}
9851     node_list = cfg.GetNodeList()
9852
9853     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9854       hypervisor_name = self.hypervisor
9855     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9856       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9857     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9858       hypervisor_name = cluster_info.enabled_hypervisors[0]
9859
9860     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9861                                         hypervisor_name)
9862     node_iinfo = \
9863       self.rpc.call_all_instances_info(node_list,
9864                                        cluster_info.enabled_hypervisors)
9865     for nname, nresult in node_data.items():
9866       # first fill in static (config-based) values
9867       ninfo = cfg.GetNodeInfo(nname)
9868       pnr = {
9869         "tags": list(ninfo.GetTags()),
9870         "primary_ip": ninfo.primary_ip,
9871         "secondary_ip": ninfo.secondary_ip,
9872         "offline": ninfo.offline,
9873         "drained": ninfo.drained,
9874         "master_candidate": ninfo.master_candidate,
9875         }
9876
9877       if not (ninfo.offline or ninfo.drained):
9878         nresult.Raise("Can't get data for node %s" % nname)
9879         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9880                                 nname)
9881         remote_info = nresult.payload
9882
9883         for attr in ['memory_total', 'memory_free', 'memory_dom0',
9884                      'vg_size', 'vg_free', 'cpu_total']:
9885           if attr not in remote_info:
9886             raise errors.OpExecError("Node '%s' didn't return attribute"
9887                                      " '%s'" % (nname, attr))
9888           if not isinstance(remote_info[attr], int):
9889             raise errors.OpExecError("Node '%s' returned invalid value"
9890                                      " for '%s': %s" %
9891                                      (nname, attr, remote_info[attr]))
9892         # compute memory used by primary instances
9893         i_p_mem = i_p_up_mem = 0
9894         for iinfo, beinfo in i_list:
9895           if iinfo.primary_node == nname:
9896             i_p_mem += beinfo[constants.BE_MEMORY]
9897             if iinfo.name not in node_iinfo[nname].payload:
9898               i_used_mem = 0
9899             else:
9900               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9901             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9902             remote_info['memory_free'] -= max(0, i_mem_diff)
9903
9904             if iinfo.admin_up:
9905               i_p_up_mem += beinfo[constants.BE_MEMORY]
9906
9907         # compute memory used by instances
9908         pnr_dyn = {
9909           "total_memory": remote_info['memory_total'],
9910           "reserved_memory": remote_info['memory_dom0'],
9911           "free_memory": remote_info['memory_free'],
9912           "total_disk": remote_info['vg_size'],
9913           "free_disk": remote_info['vg_free'],
9914           "total_cpus": remote_info['cpu_total'],
9915           "i_pri_memory": i_p_mem,
9916           "i_pri_up_memory": i_p_up_mem,
9917           }
9918         pnr.update(pnr_dyn)
9919
9920       node_results[nname] = pnr
9921     data["nodes"] = node_results
9922
9923     # instance data
9924     instance_data = {}
9925     for iinfo, beinfo in i_list:
9926       nic_data = []
9927       for nic in iinfo.nics:
9928         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9929         nic_dict = {"mac": nic.mac,
9930                     "ip": nic.ip,
9931                     "mode": filled_params[constants.NIC_MODE],
9932                     "link": filled_params[constants.NIC_LINK],
9933                    }
9934         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9935           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9936         nic_data.append(nic_dict)
9937       pir = {
9938         "tags": list(iinfo.GetTags()),
9939         "admin_up": iinfo.admin_up,
9940         "vcpus": beinfo[constants.BE_VCPUS],
9941         "memory": beinfo[constants.BE_MEMORY],
9942         "os": iinfo.os,
9943         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9944         "nics": nic_data,
9945         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9946         "disk_template": iinfo.disk_template,
9947         "hypervisor": iinfo.hypervisor,
9948         }
9949       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9950                                                  pir["disks"])
9951       instance_data[iinfo.name] = pir
9952
9953     data["instances"] = instance_data
9954
9955     self.in_data = data
9956
9957   def _AddNewInstance(self):
9958     """Add new instance data to allocator structure.
9959
9960     This in combination with _AllocatorGetClusterData will create the
9961     correct structure needed as input for the allocator.
9962
9963     The checks for the completeness of the opcode must have already been
9964     done.
9965
9966     """
9967     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9968
9969     if self.disk_template in constants.DTS_NET_MIRROR:
9970       self.required_nodes = 2
9971     else:
9972       self.required_nodes = 1
9973     request = {
9974       "name": self.name,
9975       "disk_template": self.disk_template,
9976       "tags": self.tags,
9977       "os": self.os,
9978       "vcpus": self.vcpus,
9979       "memory": self.mem_size,
9980       "disks": self.disks,
9981       "disk_space_total": disk_space,
9982       "nics": self.nics,
9983       "required_nodes": self.required_nodes,
9984       }
9985     return request
9986
9987   def _AddRelocateInstance(self):
9988     """Add relocate instance data to allocator structure.
9989
9990     This in combination with _IAllocatorGetClusterData will create the
9991     correct structure needed as input for the allocator.
9992
9993     The checks for the completeness of the opcode must have already been
9994     done.
9995
9996     """
9997     instance = self.cfg.GetInstanceInfo(self.name)
9998     if instance is None:
9999       raise errors.ProgrammerError("Unknown instance '%s' passed to"
10000                                    " IAllocator" % self.name)
10001
10002     if instance.disk_template not in constants.DTS_NET_MIRROR:
10003       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
10004                                  errors.ECODE_INVAL)
10005
10006     if len(instance.secondary_nodes) != 1:
10007       raise errors.OpPrereqError("Instance has not exactly one secondary node",
10008                                  errors.ECODE_STATE)
10009
10010     self.required_nodes = 1
10011     disk_sizes = [{'size': disk.size} for disk in instance.disks]
10012     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
10013
10014     request = {
10015       "name": self.name,
10016       "disk_space_total": disk_space,
10017       "required_nodes": self.required_nodes,
10018       "relocate_from": self.relocate_from,
10019       }
10020     return request
10021
10022   def _AddEvacuateNodes(self):
10023     """Add evacuate nodes data to allocator structure.
10024
10025     """
10026     request = {
10027       "evac_nodes": self.evac_nodes
10028       }
10029     return request
10030
10031   def _BuildInputData(self, fn):
10032     """Build input data structures.
10033
10034     """
10035     self._ComputeClusterData()
10036
10037     request = fn()
10038     request["type"] = self.mode
10039     self.in_data["request"] = request
10040
10041     self.in_text = serializer.Dump(self.in_data)
10042
10043   def Run(self, name, validate=True, call_fn=None):
10044     """Run an instance allocator and return the results.
10045
10046     """
10047     if call_fn is None:
10048       call_fn = self.rpc.call_iallocator_runner
10049
10050     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
10051     result.Raise("Failure while running the iallocator script")
10052
10053     self.out_text = result.payload
10054     if validate:
10055       self._ValidateResult()
10056
10057   def _ValidateResult(self):
10058     """Process the allocator results.
10059
10060     This will process and if successful save the result in
10061     self.out_data and the other parameters.
10062
10063     """
10064     try:
10065       rdict = serializer.Load(self.out_text)
10066     except Exception, err:
10067       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
10068
10069     if not isinstance(rdict, dict):
10070       raise errors.OpExecError("Can't parse iallocator results: not a dict")
10071
10072     # TODO: remove backwards compatiblity in later versions
10073     if "nodes" in rdict and "result" not in rdict:
10074       rdict["result"] = rdict["nodes"]
10075       del rdict["nodes"]
10076
10077     for key in "success", "info", "result":
10078       if key not in rdict:
10079         raise errors.OpExecError("Can't parse iallocator results:"
10080                                  " missing key '%s'" % key)
10081       setattr(self, key, rdict[key])
10082
10083     if not isinstance(rdict["result"], list):
10084       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
10085                                " is not a list")
10086     self.out_data = rdict
10087
10088
10089 class LUTestAllocator(NoHooksLU):
10090   """Run allocator tests.
10091
10092   This LU runs the allocator tests
10093
10094   """
10095   _OP_REQP = ["direction", "mode", "name"]
10096   _OP_DEFS = [
10097     ("hypervisor", None),
10098     ("allocator", None),
10099     ]
10100
10101   def CheckPrereq(self):
10102     """Check prerequisites.
10103
10104     This checks the opcode parameters depending on the director and mode test.
10105
10106     """
10107     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10108       for attr in ["name", "mem_size", "disks", "disk_template",
10109                    "os", "tags", "nics", "vcpus"]:
10110         if not hasattr(self.op, attr):
10111           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
10112                                      attr, errors.ECODE_INVAL)
10113       iname = self.cfg.ExpandInstanceName(self.op.name)
10114       if iname is not None:
10115         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10116                                    iname, errors.ECODE_EXISTS)
10117       if not isinstance(self.op.nics, list):
10118         raise errors.OpPrereqError("Invalid parameter 'nics'",
10119                                    errors.ECODE_INVAL)
10120       for row in self.op.nics:
10121         if (not isinstance(row, dict) or
10122             "mac" not in row or
10123             "ip" not in row or
10124             "bridge" not in row):
10125           raise errors.OpPrereqError("Invalid contents of the 'nics'"
10126                                      " parameter", errors.ECODE_INVAL)
10127       if not isinstance(self.op.disks, list):
10128         raise errors.OpPrereqError("Invalid parameter 'disks'",
10129                                    errors.ECODE_INVAL)
10130       for row in self.op.disks:
10131         if (not isinstance(row, dict) or
10132             "size" not in row or
10133             not isinstance(row["size"], int) or
10134             "mode" not in row or
10135             row["mode"] not in ['r', 'w']):
10136           raise errors.OpPrereqError("Invalid contents of the 'disks'"
10137                                      " parameter", errors.ECODE_INVAL)
10138       if self.op.hypervisor is None:
10139         self.op.hypervisor = self.cfg.GetHypervisorType()
10140     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10141       if not hasattr(self.op, "name"):
10142         raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
10143                                    errors.ECODE_INVAL)
10144       fname = _ExpandInstanceName(self.cfg, self.op.name)
10145       self.op.name = fname
10146       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10147     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10148       if not hasattr(self.op, "evac_nodes"):
10149         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10150                                    " opcode input", errors.ECODE_INVAL)
10151     else:
10152       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10153                                  self.op.mode, errors.ECODE_INVAL)
10154
10155     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10156       if self.op.allocator is None:
10157         raise errors.OpPrereqError("Missing allocator name",
10158                                    errors.ECODE_INVAL)
10159     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10160       raise errors.OpPrereqError("Wrong allocator test '%s'" %
10161                                  self.op.direction, errors.ECODE_INVAL)
10162
10163   def Exec(self, feedback_fn):
10164     """Run the allocator test.
10165
10166     """
10167     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10168       ial = IAllocator(self.cfg, self.rpc,
10169                        mode=self.op.mode,
10170                        name=self.op.name,
10171                        mem_size=self.op.mem_size,
10172                        disks=self.op.disks,
10173                        disk_template=self.op.disk_template,
10174                        os=self.op.os,
10175                        tags=self.op.tags,
10176                        nics=self.op.nics,
10177                        vcpus=self.op.vcpus,
10178                        hypervisor=self.op.hypervisor,
10179                        )
10180     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10181       ial = IAllocator(self.cfg, self.rpc,
10182                        mode=self.op.mode,
10183                        name=self.op.name,
10184                        relocate_from=list(self.relocate_from),
10185                        )
10186     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10187       ial = IAllocator(self.cfg, self.rpc,
10188                        mode=self.op.mode,
10189                        evac_nodes=self.op.evac_nodes)
10190     else:
10191       raise errors.ProgrammerError("Uncatched mode %s in"
10192                                    " LUTestAllocator.Exec", self.op.mode)
10193
10194     if self.op.direction == constants.IALLOCATOR_DIR_IN:
10195       result = ial.in_text
10196     else:
10197       ial.Run(self.op.allocator, validate=False)
10198       result = ial.out_text
10199     return result