code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39
  40 from ganeti import ssh
  41 from ganeti import utils
  42 from ganeti import errors
  43 from ganeti import hypervisor
  44 from ganeti import locking
  45 from ganeti import constants
  46 from ganeti import objects
  47 from ganeti import serializer
  48 from ganeti import ssconf
  49 from ganeti import uidpool
  50 from ganeti import compat
  51 from ganeti import masterd
  52
  53 import ganeti.masterd.instance # pylint: disable-msg=W0611
  54
  55
  56 class LogicalUnit(object):
  57   """Logical Unit base class.
  58
  59   Subclasses must follow these rules:
  60     - implement ExpandNames
  61     - implement CheckPrereq (except when tasklets are used)
  62     - implement Exec (except when tasklets are used)
  63     - implement BuildHooksEnv
  64     - redefine HPATH and HTYPE
  65     - optionally redefine their run requirements:
  66         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
  67
  68   Note that all commands require root permissions.
  69
  70   @ivar dry_run_result: the value (if any) that will be returned to the caller
  71       in dry-run mode (signalled by opcode dry_run parameter)
  72
  73   """
  74   HPATH = None
  75   HTYPE = None
  76   _OP_REQP = []
  77   REQ_BGL = True
  78
  79   def __init__(self, processor, op, context, rpc):
  80     """Constructor for LogicalUnit.
  81
  82     This needs to be overridden in derived classes in order to check op
  83     validity.
  84
  85     """
  86     self.proc = processor
  87     self.op = op
  88     self.cfg = context.cfg
  89     self.context = context
  90     self.rpc = rpc
  91     # Dicts used to declare locking needs to mcpu
  92     self.needed_locks = None
  93     self.acquired_locks = {}
  94     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
  95     self.add_locks = {}
  96     self.remove_locks = {}
  97     # Used to force good behavior when calling helper functions
  98     self.recalculate_locks = {}
  99     self.__ssh = None
 100     # logging
 101     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 102     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 103     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 104     # support for dry-run
 105     self.dry_run_result = None
 106     # support for generic debug attribute
 107     if (not hasattr(self.op, "debug_level") or
 108         not isinstance(self.op.debug_level, int)):
 109       self.op.debug_level = 0
 110
 111     # Tasklets
 112     self.tasklets = None
 113
 114     for attr_name in self._OP_REQP:
 115       attr_val = getattr(op, attr_name, None)
 116       if attr_val is None:
 117         raise errors.OpPrereqError("Required parameter '%s' missing" %
 118                                    attr_name, errors.ECODE_INVAL)
 119
 120     self.CheckArguments()
 121
 122   def __GetSSH(self):
 123     """Returns the SshRunner object
 124
 125     """
 126     if not self.__ssh:
 127       self.__ssh = ssh.SshRunner(self.cfg.GetClusterName())
 128     return self.__ssh
 129
 130   ssh = property(fget=__GetSSH)
 131
 132   def CheckArguments(self):
 133     """Check syntactic validity for the opcode arguments.
 134
 135     This method is for doing a simple syntactic check and ensure
 136     validity of opcode parameters, without any cluster-related
 137     checks. While the same can be accomplished in ExpandNames and/or
 138     CheckPrereq, doing these separate is better because:
 139
 140       - ExpandNames is left as as purely a lock-related function
 141       - CheckPrereq is run after we have acquired locks (and possible
 142         waited for them)
 143
 144     The function is allowed to change the self.op attribute so that
 145     later methods can no longer worry about missing parameters.
 146
 147     """
 148     pass
 149
 150   def ExpandNames(self):
 151     """Expand names for this LU.
 152
 153     This method is called before starting to execute the opcode, and it should
 154     update all the parameters of the opcode to their canonical form (e.g. a
 155     short node name must be fully expanded after this method has successfully
 156     completed). This way locking, hooks, logging, ecc. can work correctly.
 157
 158     LUs which implement this method must also populate the self.needed_locks
 159     member, as a dict with lock levels as keys, and a list of needed lock names
 160     as values. Rules:
 161
 162       - use an empty dict if you don't need any lock
 163       - if you don't need any lock at a particular level omit that level
 164       - don't put anything for the BGL level
 165       - if you want all locks at a level use locking.ALL_SET as a value
 166
 167     If you need to share locks (rather than acquire them exclusively) at one
 168     level you can modify self.share_locks, setting a true value (usually 1) for
 169     that level. By default locks are not shared.
 170
 171     This function can also define a list of tasklets, which then will be
 172     executed in order instead of the usual LU-level CheckPrereq and Exec
 173     functions, if those are not defined by the LU.
 174
 175     Examples::
 176
 177       # Acquire all nodes and one instance
 178       self.needed_locks = {
 179         locking.LEVEL_NODE: locking.ALL_SET,
 180         locking.LEVEL_INSTANCE: ['instance1.example.tld'],
 181       }
 182       # Acquire just two nodes
 183       self.needed_locks = {
 184         locking.LEVEL_NODE: ['node1.example.tld', 'node2.example.tld'],
 185       }
 186       # Acquire no locks
 187       self.needed_locks = {} # No, you can't leave it to the default value None
 188
 189     """
 190     # The implementation of this method is mandatory only if the new LU is
 191     # concurrent, so that old LUs don't need to be changed all at the same
 192     # time.
 193     if self.REQ_BGL:
 194       self.needed_locks = {} # Exclusive LUs don't need locks.
 195     else:
 196       raise NotImplementedError
 197
 198   def DeclareLocks(self, level):
 199     """Declare LU locking needs for a level
 200
 201     While most LUs can just declare their locking needs at ExpandNames time,
 202     sometimes there's the need to calculate some locks after having acquired
 203     the ones before. This function is called just before acquiring locks at a
 204     particular level, but after acquiring the ones at lower levels, and permits
 205     such calculations. It can be used to modify self.needed_locks, and by
 206     default it does nothing.
 207
 208     This function is only called if you have something already set in
 209     self.needed_locks for the level.
 210
 211     @param level: Locking level which is going to be locked
 212     @type level: member of ganeti.locking.LEVELS
 213
 214     """
 215
 216   def CheckPrereq(self):
 217     """Check prerequisites for this LU.
 218
 219     This method should check that the prerequisites for the execution
 220     of this LU are fulfilled. It can do internode communication, but
 221     it should be idempotent - no cluster or system changes are
 222     allowed.
 223
 224     The method should raise errors.OpPrereqError in case something is
 225     not fulfilled. Its return value is ignored.
 226
 227     This method should also update all the parameters of the opcode to
 228     their canonical form if it hasn't been done by ExpandNames before.
 229
 230     """
 231     if self.tasklets is not None:
 232       for (idx, tl) in enumerate(self.tasklets):
 233         logging.debug("Checking prerequisites for tasklet %s/%s",
 234                       idx + 1, len(self.tasklets))
 235         tl.CheckPrereq()
 236     else:
 237       raise NotImplementedError
 238
 239   def Exec(self, feedback_fn):
 240     """Execute the LU.
 241
 242     This method should implement the actual work. It should raise
 243     errors.OpExecError for failures that are somewhat dealt with in
 244     code, or expected.
 245
 246     """
 247     if self.tasklets is not None:
 248       for (idx, tl) in enumerate(self.tasklets):
 249         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 250         tl.Exec(feedback_fn)
 251     else:
 252       raise NotImplementedError
 253
 254   def BuildHooksEnv(self):
 255     """Build hooks environment for this LU.
 256
 257     This method should return a three-node tuple consisting of: a dict
 258     containing the environment that will be used for running the
 259     specific hook for this LU, a list of node names on which the hook
 260     should run before the execution, and a list of node names on which
 261     the hook should run after the execution.
 262
 263     The keys of the dict must not have 'GANETI_' prefixed as this will
 264     be handled in the hooks runner. Also note additional keys will be
 265     added by the hooks runner. If the LU doesn't define any
 266     environment, an empty dict (and not None) should be returned.
 267
 268     No nodes should be returned as an empty list (and not None).
 269
 270     Note that if the HPATH for a LU class is None, this function will
 271     not be called.
 272
 273     """
 274     raise NotImplementedError
 275
 276   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 277     """Notify the LU about the results of its hooks.
 278
 279     This method is called every time a hooks phase is executed, and notifies
 280     the Logical Unit about the hooks' result. The LU can then use it to alter
 281     its result based on the hooks.  By default the method does nothing and the
 282     previous result is passed back unchanged but any LU can define it if it
 283     wants to use the local cluster hook-scripts somehow.
 284
 285     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 286         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 287     @param hook_results: the results of the multi-node hooks rpc call
 288     @param feedback_fn: function used send feedback back to the caller
 289     @param lu_result: the previous Exec result this LU had, or None
 290         in the PRE phase
 291     @return: the new Exec result, based on the previous result
 292         and hook results
 293
 294     """
 295     # API must be kept, thus we ignore the unused argument and could
 296     # be a function warnings
 297     # pylint: disable-msg=W0613,R0201
 298     return lu_result
 299
 300   def _ExpandAndLockInstance(self):
 301     """Helper function to expand and lock an instance.
 302
 303     Many LUs that work on an instance take its name in self.op.instance_name
 304     and need to expand it and then declare the expanded name for locking. This
 305     function does it, and then updates self.op.instance_name to the expanded
 306     name. It also initializes needed_locks as a dict, if this hasn't been done
 307     before.
 308
 309     """
 310     if self.needed_locks is None:
 311       self.needed_locks = {}
 312     else:
 313       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 314         "_ExpandAndLockInstance called with instance-level locks set"
 315     self.op.instance_name = _ExpandInstanceName(self.cfg,
 316                                                 self.op.instance_name)
 317     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 318
 319   def _LockInstancesNodes(self, primary_only=False):
 320     """Helper function to declare instances' nodes for locking.
 321
 322     This function should be called after locking one or more instances to lock
 323     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 324     with all primary or secondary nodes for instances already locked and
 325     present in self.needed_locks[locking.LEVEL_INSTANCE].
 326
 327     It should be called from DeclareLocks, and for safety only works if
 328     self.recalculate_locks[locking.LEVEL_NODE] is set.
 329
 330     In the future it may grow parameters to just lock some instance's nodes, or
 331     to just lock primaries or secondary nodes, if needed.
 332
 333     If should be called in DeclareLocks in a way similar to::
 334
 335       if level == locking.LEVEL_NODE:
 336         self._LockInstancesNodes()
 337
 338     @type primary_only: boolean
 339     @param primary_only: only lock primary nodes of locked instances
 340
 341     """
 342     assert locking.LEVEL_NODE in self.recalculate_locks, \
 343       "_LockInstancesNodes helper function called with no nodes to recalculate"
 344
 345     # TODO: check if we're really been called with the instance locks held
 346
 347     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 348     # future we might want to have different behaviors depending on the value
 349     # of self.recalculate_locks[locking.LEVEL_NODE]
 350     wanted_nodes = []
 351     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 352       instance = self.context.cfg.GetInstanceInfo(instance_name)
 353       wanted_nodes.append(instance.primary_node)
 354       if not primary_only:
 355         wanted_nodes.extend(instance.secondary_nodes)
 356
 357     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 358       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 359     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 360       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 361
 362     del self.recalculate_locks[locking.LEVEL_NODE]
 363
 364
 365 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 366   """Simple LU which runs no hooks.
 367
 368   This LU is intended as a parent for other LogicalUnits which will
 369   run no hooks, in order to reduce duplicate code.
 370
 371   """
 372   HPATH = None
 373   HTYPE = None
 374
 375   def BuildHooksEnv(self):
 376     """Empty BuildHooksEnv for NoHooksLu.
 377
 378     This just raises an error.
 379
 380     """
 381     assert False, "BuildHooksEnv called for NoHooksLUs"
 382
 383
 384 class Tasklet:
 385   """Tasklet base class.
 386
 387   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 388   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 389   tasklets know nothing about locks.
 390
 391   Subclasses must follow these rules:
 392     - Implement CheckPrereq
 393     - Implement Exec
 394
 395   """
 396   def __init__(self, lu):
 397     self.lu = lu
 398
 399     # Shortcuts
 400     self.cfg = lu.cfg
 401     self.rpc = lu.rpc
 402
 403   def CheckPrereq(self):
 404     """Check prerequisites for this tasklets.
 405
 406     This method should check whether the prerequisites for the execution of
 407     this tasklet are fulfilled. It can do internode communication, but it
 408     should be idempotent - no cluster or system changes are allowed.
 409
 410     The method should raise errors.OpPrereqError in case something is not
 411     fulfilled. Its return value is ignored.
 412
 413     This method should also update all parameters to their canonical form if it
 414     hasn't been done before.
 415
 416     """
 417     raise NotImplementedError
 418
 419   def Exec(self, feedback_fn):
 420     """Execute the tasklet.
 421
 422     This method should implement the actual work. It should raise
 423     errors.OpExecError for failures that are somewhat dealt with in code, or
 424     expected.
 425
 426     """
 427     raise NotImplementedError
 428
 429
 430 def _GetWantedNodes(lu, nodes):
 431   """Returns list of checked and expanded node names.
 432
 433   @type lu: L{LogicalUnit}
 434   @param lu: the logical unit on whose behalf we execute
 435   @type nodes: list
 436   @param nodes: list of node names or None for all nodes
 437   @rtype: list
 438   @return: the list of nodes, sorted
 439   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 440
 441   """
 442   if not isinstance(nodes, list):
 443     raise errors.OpPrereqError("Invalid argument type 'nodes'",
 444                                errors.ECODE_INVAL)
 445
 446   if not nodes:
 447     raise errors.ProgrammerError("_GetWantedNodes should only be called with a"
 448       " non-empty list of nodes whose name is to be expanded.")
 449
 450   wanted = [_ExpandNodeName(lu.cfg, name) for name in nodes]
 451   return utils.NiceSort(wanted)
 452
 453
 454 def _GetWantedInstances(lu, instances):
 455   """Returns list of checked and expanded instance names.
 456
 457   @type lu: L{LogicalUnit}
 458   @param lu: the logical unit on whose behalf we execute
 459   @type instances: list
 460   @param instances: list of instance names or None for all instances
 461   @rtype: list
 462   @return: the list of instances, sorted
 463   @raise errors.OpPrereqError: if the instances parameter is wrong type
 464   @raise errors.OpPrereqError: if any of the passed instances is not found
 465
 466   """
 467   if not isinstance(instances, list):
 468     raise errors.OpPrereqError("Invalid argument type 'instances'",
 469                                errors.ECODE_INVAL)
 470
 471   if instances:
 472     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 473   else:
 474     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 475   return wanted
 476
 477
 478 def _GetUpdatedParams(old_params, update_dict,
 479                       use_default=True, use_none=False):
 480   """Return the new version of a parameter dictionary.
 481
 482   @type old_params: dict
 483   @param old_params: old parameters
 484   @type update_dict: dict
 485   @param update_dict: dict containing new parameter values, or
 486       constants.VALUE_DEFAULT to reset the parameter to its default
 487       value
 488   @param use_default: boolean
 489   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 490       values as 'to be deleted' values
 491   @param use_none: boolean
 492   @type use_none: whether to recognise C{None} values as 'to be
 493       deleted' values
 494   @rtype: dict
 495   @return: the new parameter dictionary
 496
 497   """
 498   params_copy = copy.deepcopy(old_params)
 499   for key, val in update_dict.iteritems():
 500     if ((use_default and val == constants.VALUE_DEFAULT) or
 501         (use_none and val is None)):
 502       try:
 503         del params_copy[key]
 504       except KeyError:
 505         pass
 506     else:
 507       params_copy[key] = val
 508   return params_copy
 509
 510
 511 def _CheckOutputFields(static, dynamic, selected):
 512   """Checks whether all selected fields are valid.
 513
 514   @type static: L{utils.FieldSet}
 515   @param static: static fields set
 516   @type dynamic: L{utils.FieldSet}
 517   @param dynamic: dynamic fields set
 518
 519   """
 520   f = utils.FieldSet()
 521   f.Extend(static)
 522   f.Extend(dynamic)
 523
 524   delta = f.NonMatching(selected)
 525   if delta:
 526     raise errors.OpPrereqError("Unknown output fields selected: %s"
 527                                % ",".join(delta), errors.ECODE_INVAL)
 528
 529
 530 def _CheckBooleanOpField(op, name):
 531   """Validates boolean opcode parameters.
 532
 533   This will ensure that an opcode parameter is either a boolean value,
 534   or None (but that it always exists).
 535
 536   """
 537   val = getattr(op, name, None)
 538   if not (val is None or isinstance(val, bool)):
 539     raise errors.OpPrereqError("Invalid boolean parameter '%s' (%s)" %
 540                                (name, str(val)), errors.ECODE_INVAL)
 541   setattr(op, name, val)
 542
 543
 544 def _CheckGlobalHvParams(params):
 545   """Validates that given hypervisor params are not global ones.
 546
 547   This will ensure that instances don't get customised versions of
 548   global params.
 549
 550   """
 551   used_globals = constants.HVC_GLOBALS.intersection(params)
 552   if used_globals:
 553     msg = ("The following hypervisor parameters are global and cannot"
 554            " be customized at instance level, please modify them at"
 555            " cluster level: %s" % utils.CommaJoin(used_globals))
 556     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 557
 558
 559 def _CheckNodeOnline(lu, node):
 560   """Ensure that a given node is online.
 561
 562   @param lu: the LU on behalf of which we make the check
 563   @param node: the node to check
 564   @raise errors.OpPrereqError: if the node is offline
 565
 566   """
 567   if lu.cfg.GetNodeInfo(node).offline:
 568     raise errors.OpPrereqError("Can't use offline node %s" % node,
 569                                errors.ECODE_INVAL)
 570
 571
 572 def _CheckNodeNotDrained(lu, node):
 573   """Ensure that a given node is not drained.
 574
 575   @param lu: the LU on behalf of which we make the check
 576   @param node: the node to check
 577   @raise errors.OpPrereqError: if the node is drained
 578
 579   """
 580   if lu.cfg.GetNodeInfo(node).drained:
 581     raise errors.OpPrereqError("Can't use drained node %s" % node,
 582                                errors.ECODE_INVAL)
 583
 584
 585 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 586   """Ensure that a node supports a given OS.
 587
 588   @param lu: the LU on behalf of which we make the check
 589   @param node: the node to check
 590   @param os_name: the OS to query about
 591   @param force_variant: whether to ignore variant errors
 592   @raise errors.OpPrereqError: if the node is not supporting the OS
 593
 594   """
 595   result = lu.rpc.call_os_get(node, os_name)
 596   result.Raise("OS '%s' not in supported OS list for node %s" %
 597                (os_name, node),
 598                prereq=True, ecode=errors.ECODE_INVAL)
 599   if not force_variant:
 600     _CheckOSVariant(result.payload, os_name)
 601
 602
 603 def _RequireFileStorage():
 604   """Checks that file storage is enabled.
 605
 606   @raise errors.OpPrereqError: when file storage is disabled
 607
 608   """
 609   if not constants.ENABLE_FILE_STORAGE:
 610     raise errors.OpPrereqError("File storage disabled at configure time",
 611                                errors.ECODE_INVAL)
 612
 613
 614 def _CheckDiskTemplate(template):
 615   """Ensure a given disk template is valid.
 616
 617   """
 618   if template not in constants.DISK_TEMPLATES:
 619     msg = ("Invalid disk template name '%s', valid templates are: %s" %
 620            (template, utils.CommaJoin(constants.DISK_TEMPLATES)))
 621     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 622   if template == constants.DT_FILE:
 623     _RequireFileStorage()
 624
 625
 626 def _CheckStorageType(storage_type):
 627   """Ensure a given storage type is valid.
 628
 629   """
 630   if storage_type not in constants.VALID_STORAGE_TYPES:
 631     raise errors.OpPrereqError("Unknown storage type: %s" % storage_type,
 632                                errors.ECODE_INVAL)
 633   if storage_type == constants.ST_FILE:
 634     _RequireFileStorage()
 635
 636
 637 def _GetClusterDomainSecret():
 638   """Reads the cluster domain secret.
 639
 640   """
 641   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 642                                strict=True)
 643
 644
 645 def _CheckInstanceDown(lu, instance, reason):
 646   """Ensure that an instance is not running."""
 647   if instance.admin_up:
 648     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 649                                (instance.name, reason), errors.ECODE_STATE)
 650
 651   pnode = instance.primary_node
 652   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 653   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 654               prereq=True, ecode=errors.ECODE_ENVIRON)
 655
 656   if instance.name in ins_l.payload:
 657     raise errors.OpPrereqError("Instance %s is running, %s" %
 658                                (instance.name, reason), errors.ECODE_STATE)
 659
 660
 661 def _ExpandItemName(fn, name, kind):
 662   """Expand an item name.
 663
 664   @param fn: the function to use for expansion
 665   @param name: requested item name
 666   @param kind: text description ('Node' or 'Instance')
 667   @return: the resolved (full) name
 668   @raise errors.OpPrereqError: if the item is not found
 669
 670   """
 671   full_name = fn(name)
 672   if full_name is None:
 673     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 674                                errors.ECODE_NOENT)
 675   return full_name
 676
 677
 678 def _ExpandNodeName(cfg, name):
 679   """Wrapper over L{_ExpandItemName} for nodes."""
 680   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 681
 682
 683 def _ExpandInstanceName(cfg, name):
 684   """Wrapper over L{_ExpandItemName} for instance."""
 685   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 686
 687
 688 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 689                           memory, vcpus, nics, disk_template, disks,
 690                           bep, hvp, hypervisor_name):
 691   """Builds instance related env variables for hooks
 692
 693   This builds the hook environment from individual variables.
 694
 695   @type name: string
 696   @param name: the name of the instance
 697   @type primary_node: string
 698   @param primary_node: the name of the instance's primary node
 699   @type secondary_nodes: list
 700   @param secondary_nodes: list of secondary nodes as strings
 701   @type os_type: string
 702   @param os_type: the name of the instance's OS
 703   @type status: boolean
 704   @param status: the should_run status of the instance
 705   @type memory: string
 706   @param memory: the memory size of the instance
 707   @type vcpus: string
 708   @param vcpus: the count of VCPUs the instance has
 709   @type nics: list
 710   @param nics: list of tuples (ip, mac, mode, link) representing
 711       the NICs the instance has
 712   @type disk_template: string
 713   @param disk_template: the disk template of the instance
 714   @type disks: list
 715   @param disks: the list of (size, mode) pairs
 716   @type bep: dict
 717   @param bep: the backend parameters for the instance
 718   @type hvp: dict
 719   @param hvp: the hypervisor parameters for the instance
 720   @type hypervisor_name: string
 721   @param hypervisor_name: the hypervisor for the instance
 722   @rtype: dict
 723   @return: the hook environment for this instance
 724
 725   """
 726   if status:
 727     str_status = "up"
 728   else:
 729     str_status = "down"
 730   env = {
 731     "OP_TARGET": name,
 732     "INSTANCE_NAME": name,
 733     "INSTANCE_PRIMARY": primary_node,
 734     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 735     "INSTANCE_OS_TYPE": os_type,
 736     "INSTANCE_STATUS": str_status,
 737     "INSTANCE_MEMORY": memory,
 738     "INSTANCE_VCPUS": vcpus,
 739     "INSTANCE_DISK_TEMPLATE": disk_template,
 740     "INSTANCE_HYPERVISOR": hypervisor_name,
 741   }
 742
 743   if nics:
 744     nic_count = len(nics)
 745     for idx, (ip, mac, mode, link) in enumerate(nics):
 746       if ip is None:
 747         ip = ""
 748       env["INSTANCE_NIC%d_IP" % idx] = ip
 749       env["INSTANCE_NIC%d_MAC" % idx] = mac
 750       env["INSTANCE_NIC%d_MODE" % idx] = mode
 751       env["INSTANCE_NIC%d_LINK" % idx] = link
 752       if mode == constants.NIC_MODE_BRIDGED:
 753         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 754   else:
 755     nic_count = 0
 756
 757   env["INSTANCE_NIC_COUNT"] = nic_count
 758
 759   if disks:
 760     disk_count = len(disks)
 761     for idx, (size, mode) in enumerate(disks):
 762       env["INSTANCE_DISK%d_SIZE" % idx] = size
 763       env["INSTANCE_DISK%d_MODE" % idx] = mode
 764   else:
 765     disk_count = 0
 766
 767   env["INSTANCE_DISK_COUNT"] = disk_count
 768
 769   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 770     for key, value in source.items():
 771       env["INSTANCE_%s_%s" % (kind, key)] = value
 772
 773   return env
 774
 775
 776 def _NICListToTuple(lu, nics):
 777   """Build a list of nic information tuples.
 778
 779   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 780   value in LUQueryInstanceData.
 781
 782   @type lu:  L{LogicalUnit}
 783   @param lu: the logical unit on whose behalf we execute
 784   @type nics: list of L{objects.NIC}
 785   @param nics: list of nics to convert to hooks tuples
 786
 787   """
 788   hooks_nics = []
 789   cluster = lu.cfg.GetClusterInfo()
 790   for nic in nics:
 791     ip = nic.ip
 792     mac = nic.mac
 793     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 794     mode = filled_params[constants.NIC_MODE]
 795     link = filled_params[constants.NIC_LINK]
 796     hooks_nics.append((ip, mac, mode, link))
 797   return hooks_nics
 798
 799
 800 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 801   """Builds instance related env variables for hooks from an object.
 802
 803   @type lu: L{LogicalUnit}
 804   @param lu: the logical unit on whose behalf we execute
 805   @type instance: L{objects.Instance}
 806   @param instance: the instance for which we should build the
 807       environment
 808   @type override: dict
 809   @param override: dictionary with key/values that will override
 810       our values
 811   @rtype: dict
 812   @return: the hook environment dictionary
 813
 814   """
 815   cluster = lu.cfg.GetClusterInfo()
 816   bep = cluster.FillBE(instance)
 817   hvp = cluster.FillHV(instance)
 818   args = {
 819     'name': instance.name,
 820     'primary_node': instance.primary_node,
 821     'secondary_nodes': instance.secondary_nodes,
 822     'os_type': instance.os,
 823     'status': instance.admin_up,
 824     'memory': bep[constants.BE_MEMORY],
 825     'vcpus': bep[constants.BE_VCPUS],
 826     'nics': _NICListToTuple(lu, instance.nics),
 827     'disk_template': instance.disk_template,
 828     'disks': [(disk.size, disk.mode) for disk in instance.disks],
 829     'bep': bep,
 830     'hvp': hvp,
 831     'hypervisor_name': instance.hypervisor,
 832   }
 833   if override:
 834     args.update(override)
 835   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
 836
 837
 838 def _AdjustCandidatePool(lu, exceptions):
 839   """Adjust the candidate pool after node operations.
 840
 841   """
 842   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
 843   if mod_list:
 844     lu.LogInfo("Promoted nodes to master candidate role: %s",
 845                utils.CommaJoin(node.name for node in mod_list))
 846     for name in mod_list:
 847       lu.context.ReaddNode(name)
 848   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 849   if mc_now > mc_max:
 850     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
 851                (mc_now, mc_max))
 852
 853
 854 def _DecideSelfPromotion(lu, exceptions=None):
 855   """Decide whether I should promote myself as a master candidate.
 856
 857   """
 858   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
 859   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
 860   # the new node will increase mc_max with one, so:
 861   mc_should = min(mc_should + 1, cp_size)
 862   return mc_now < mc_should
 863
 864
 865 def _CheckNicsBridgesExist(lu, target_nics, target_node):
 866   """Check that the brigdes needed by a list of nics exist.
 867
 868   """
 869   cluster = lu.cfg.GetClusterInfo()
 870   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
 871   brlist = [params[constants.NIC_LINK] for params in paramslist
 872             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
 873   if brlist:
 874     result = lu.rpc.call_bridges_exist(target_node, brlist)
 875     result.Raise("Error checking bridges on destination node '%s'" %
 876                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
 877
 878
 879 def _CheckInstanceBridgesExist(lu, instance, node=None):
 880   """Check that the brigdes needed by an instance exist.
 881
 882   """
 883   if node is None:
 884     node = instance.primary_node
 885   _CheckNicsBridgesExist(lu, instance.nics, node)
 886
 887
 888 def _CheckOSVariant(os_obj, name):
 889   """Check whether an OS name conforms to the os variants specification.
 890
 891   @type os_obj: L{objects.OS}
 892   @param os_obj: OS object to check
 893   @type name: string
 894   @param name: OS name passed by the user, to check for validity
 895
 896   """
 897   if not os_obj.supported_variants:
 898     return
 899   try:
 900     variant = name.split("+", 1)[1]
 901   except IndexError:
 902     raise errors.OpPrereqError("OS name must include a variant",
 903                                errors.ECODE_INVAL)
 904
 905   if variant not in os_obj.supported_variants:
 906     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
 907
 908
 909 def _GetNodeInstancesInner(cfg, fn):
 910   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
 911
 912
 913 def _GetNodeInstances(cfg, node_name):
 914   """Returns a list of all primary and secondary instances on a node.
 915
 916   """
 917
 918   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
 919
 920
 921 def _GetNodePrimaryInstances(cfg, node_name):
 922   """Returns primary instances on a node.
 923
 924   """
 925   return _GetNodeInstancesInner(cfg,
 926                                 lambda inst: node_name == inst.primary_node)
 927
 928
 929 def _GetNodeSecondaryInstances(cfg, node_name):
 930   """Returns secondary instances on a node.
 931
 932   """
 933   return _GetNodeInstancesInner(cfg,
 934                                 lambda inst: node_name in inst.secondary_nodes)
 935
 936
 937 def _GetStorageTypeArgs(cfg, storage_type):
 938   """Returns the arguments for a storage type.
 939
 940   """
 941   # Special case for file storage
 942   if storage_type == constants.ST_FILE:
 943     # storage.FileStorage wants a list of storage directories
 944     return [[cfg.GetFileStorageDir()]]
 945
 946   return []
 947
 948
 949 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
 950   faulty = []
 951
 952   for dev in instance.disks:
 953     cfg.SetDiskID(dev, node_name)
 954
 955   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
 956   result.Raise("Failed to get disk status from node %s" % node_name,
 957                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 958
 959   for idx, bdev_status in enumerate(result.payload):
 960     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
 961       faulty.append(idx)
 962
 963   return faulty
 964
 965
 966 class LUPostInitCluster(LogicalUnit):
 967   """Logical unit for running hooks after cluster initialization.
 968
 969   """
 970   HPATH = "cluster-init"
 971   HTYPE = constants.HTYPE_CLUSTER
 972   _OP_REQP = []
 973
 974   def BuildHooksEnv(self):
 975     """Build hooks env.
 976
 977     """
 978     env = {"OP_TARGET": self.cfg.GetClusterName()}
 979     mn = self.cfg.GetMasterNode()
 980     return env, [], [mn]
 981
 982   def CheckPrereq(self):
 983     """No prerequisites to check.
 984
 985     """
 986     return True
 987
 988   def Exec(self, feedback_fn):
 989     """Nothing to do.
 990
 991     """
 992     return True
 993
 994
 995 class LUDestroyCluster(LogicalUnit):
 996   """Logical unit for destroying the cluster.
 997
 998   """
 999   HPATH = "cluster-destroy"
1000   HTYPE = constants.HTYPE_CLUSTER
1001   _OP_REQP = []
1002
1003   def BuildHooksEnv(self):
1004     """Build hooks env.
1005
1006     """
1007     env = {"OP_TARGET": self.cfg.GetClusterName()}
1008     return env, [], []
1009
1010   def CheckPrereq(self):
1011     """Check prerequisites.
1012
1013     This checks whether the cluster is empty.
1014
1015     Any errors are signaled by raising errors.OpPrereqError.
1016
1017     """
1018     master = self.cfg.GetMasterNode()
1019
1020     nodelist = self.cfg.GetNodeList()
1021     if len(nodelist) != 1 or nodelist[0] != master:
1022       raise errors.OpPrereqError("There are still %d node(s) in"
1023                                  " this cluster." % (len(nodelist) - 1),
1024                                  errors.ECODE_INVAL)
1025     instancelist = self.cfg.GetInstanceList()
1026     if instancelist:
1027       raise errors.OpPrereqError("There are still %d instance(s) in"
1028                                  " this cluster." % len(instancelist),
1029                                  errors.ECODE_INVAL)
1030
1031   def Exec(self, feedback_fn):
1032     """Destroys the cluster.
1033
1034     """
1035     master = self.cfg.GetMasterNode()
1036     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
1037
1038     # Run post hooks on master node before it's removed
1039     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
1040     try:
1041       hm.RunPhase(constants.HOOKS_PHASE_POST, [master])
1042     except:
1043       # pylint: disable-msg=W0702
1044       self.LogWarning("Errors occurred running hooks on %s" % master)
1045
1046     result = self.rpc.call_node_stop_master(master, False)
1047     result.Raise("Could not disable the master role")
1048
1049     if modify_ssh_setup:
1050       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
1051       utils.CreateBackup(priv_key)
1052       utils.CreateBackup(pub_key)
1053
1054     return master
1055
1056
1057 def _VerifyCertificate(filename):
1058   """Verifies a certificate for LUVerifyCluster.
1059
1060   @type filename: string
1061   @param filename: Path to PEM file
1062
1063   """
1064   try:
1065     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1066                                            utils.ReadFile(filename))
1067   except Exception, err: # pylint: disable-msg=W0703
1068     return (LUVerifyCluster.ETYPE_ERROR,
1069             "Failed to load X509 certificate %s: %s" % (filename, err))
1070
1071   (errcode, msg) = \
1072     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1073                                 constants.SSL_CERT_EXPIRATION_ERROR)
1074
1075   if msg:
1076     fnamemsg = "While verifying %s: %s" % (filename, msg)
1077   else:
1078     fnamemsg = None
1079
1080   if errcode is None:
1081     return (None, fnamemsg)
1082   elif errcode == utils.CERT_WARNING:
1083     return (LUVerifyCluster.ETYPE_WARNING, fnamemsg)
1084   elif errcode == utils.CERT_ERROR:
1085     return (LUVerifyCluster.ETYPE_ERROR, fnamemsg)
1086
1087   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1088
1089
1090 class LUVerifyCluster(LogicalUnit):
1091   """Verifies the cluster status.
1092
1093   """
1094   HPATH = "cluster-verify"
1095   HTYPE = constants.HTYPE_CLUSTER
1096   _OP_REQP = ["skip_checks", "verbose", "error_codes", "debug_simulate_errors"]
1097   REQ_BGL = False
1098
1099   TCLUSTER = "cluster"
1100   TNODE = "node"
1101   TINSTANCE = "instance"
1102
1103   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1104   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1105   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1106   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1107   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1108   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1109   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1110   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1111   ENODEDRBD = (TNODE, "ENODEDRBD")
1112   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1113   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1114   ENODEHV = (TNODE, "ENODEHV")
1115   ENODELVM = (TNODE, "ENODELVM")
1116   ENODEN1 = (TNODE, "ENODEN1")
1117   ENODENET = (TNODE, "ENODENET")
1118   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1119   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1120   ENODERPC = (TNODE, "ENODERPC")
1121   ENODESSH = (TNODE, "ENODESSH")
1122   ENODEVERSION = (TNODE, "ENODEVERSION")
1123   ENODESETUP = (TNODE, "ENODESETUP")
1124   ENODETIME = (TNODE, "ENODETIME")
1125
1126   ETYPE_FIELD = "code"
1127   ETYPE_ERROR = "ERROR"
1128   ETYPE_WARNING = "WARNING"
1129
1130   class NodeImage(object):
1131     """A class representing the logical and physical status of a node.
1132
1133     @ivar volumes: a structure as returned from
1134         L{ganeti.backend.GetVolumeList} (runtime)
1135     @ivar instances: a list of running instances (runtime)
1136     @ivar pinst: list of configured primary instances (config)
1137     @ivar sinst: list of configured secondary instances (config)
1138     @ivar sbp: diction of {secondary-node: list of instances} of all peers
1139         of this node (config)
1140     @ivar mfree: free memory, as reported by hypervisor (runtime)
1141     @ivar dfree: free disk, as reported by the node (runtime)
1142     @ivar offline: the offline status (config)
1143     @type rpc_fail: boolean
1144     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1145         not whether the individual keys were correct) (runtime)
1146     @type lvm_fail: boolean
1147     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1148     @type hyp_fail: boolean
1149     @ivar hyp_fail: whether the RPC call didn't return the instance list
1150     @type ghost: boolean
1151     @ivar ghost: whether this is a known node or not (config)
1152
1153     """
1154     def __init__(self, offline=False):
1155       self.volumes = {}
1156       self.instances = []
1157       self.pinst = []
1158       self.sinst = []
1159       self.sbp = {}
1160       self.mfree = 0
1161       self.dfree = 0
1162       self.offline = offline
1163       self.rpc_fail = False
1164       self.lvm_fail = False
1165       self.hyp_fail = False
1166       self.ghost = False
1167
1168   def ExpandNames(self):
1169     self.needed_locks = {
1170       locking.LEVEL_NODE: locking.ALL_SET,
1171       locking.LEVEL_INSTANCE: locking.ALL_SET,
1172     }
1173     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1174
1175   def _Error(self, ecode, item, msg, *args, **kwargs):
1176     """Format an error message.
1177
1178     Based on the opcode's error_codes parameter, either format a
1179     parseable error code, or a simpler error string.
1180
1181     This must be called only from Exec and functions called from Exec.
1182
1183     """
1184     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1185     itype, etxt = ecode
1186     # first complete the msg
1187     if args:
1188       msg = msg % args
1189     # then format the whole message
1190     if self.op.error_codes:
1191       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1192     else:
1193       if item:
1194         item = " " + item
1195       else:
1196         item = ""
1197       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1198     # and finally report it via the feedback_fn
1199     self._feedback_fn("  - %s" % msg)
1200
1201   def _ErrorIf(self, cond, *args, **kwargs):
1202     """Log an error message if the passed condition is True.
1203
1204     """
1205     cond = bool(cond) or self.op.debug_simulate_errors
1206     if cond:
1207       self._Error(*args, **kwargs)
1208     # do not mark the operation as failed for WARN cases only
1209     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1210       self.bad = self.bad or cond
1211
1212   def _VerifyNode(self, ninfo, nresult):
1213     """Run multiple tests against a node.
1214
1215     Test list:
1216
1217       - compares ganeti version
1218       - checks vg existence and size > 20G
1219       - checks config file checksum
1220       - checks ssh to other nodes
1221
1222     @type ninfo: L{objects.Node}
1223     @param ninfo: the node to check
1224     @param nresult: the results from the node
1225     @rtype: boolean
1226     @return: whether overall this call was successful (and we can expect
1227          reasonable values in the respose)
1228
1229     """
1230     node = ninfo.name
1231     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1232
1233     # main result, nresult should be a non-empty dict
1234     test = not nresult or not isinstance(nresult, dict)
1235     _ErrorIf(test, self.ENODERPC, node,
1236                   "unable to verify node: no data returned")
1237     if test:
1238       return False
1239
1240     # compares ganeti version
1241     local_version = constants.PROTOCOL_VERSION
1242     remote_version = nresult.get("version", None)
1243     test = not (remote_version and
1244                 isinstance(remote_version, (list, tuple)) and
1245                 len(remote_version) == 2)
1246     _ErrorIf(test, self.ENODERPC, node,
1247              "connection to node returned invalid data")
1248     if test:
1249       return False
1250
1251     test = local_version != remote_version[0]
1252     _ErrorIf(test, self.ENODEVERSION, node,
1253              "incompatible protocol versions: master %s,"
1254              " node %s", local_version, remote_version[0])
1255     if test:
1256       return False
1257
1258     # node seems compatible, we can actually try to look into its results
1259
1260     # full package version
1261     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1262                   self.ENODEVERSION, node,
1263                   "software version mismatch: master %s, node %s",
1264                   constants.RELEASE_VERSION, remote_version[1],
1265                   code=self.ETYPE_WARNING)
1266
1267     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1268     if isinstance(hyp_result, dict):
1269       for hv_name, hv_result in hyp_result.iteritems():
1270         test = hv_result is not None
1271         _ErrorIf(test, self.ENODEHV, node,
1272                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1273
1274
1275     test = nresult.get(constants.NV_NODESETUP,
1276                            ["Missing NODESETUP results"])
1277     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1278              "; ".join(test))
1279
1280     return True
1281
1282   def _VerifyNodeTime(self, ninfo, nresult,
1283                       nvinfo_starttime, nvinfo_endtime):
1284     """Check the node time.
1285
1286     @type ninfo: L{objects.Node}
1287     @param ninfo: the node to check
1288     @param nresult: the remote results for the node
1289     @param nvinfo_starttime: the start time of the RPC call
1290     @param nvinfo_endtime: the end time of the RPC call
1291
1292     """
1293     node = ninfo.name
1294     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1295
1296     ntime = nresult.get(constants.NV_TIME, None)
1297     try:
1298       ntime_merged = utils.MergeTime(ntime)
1299     except (ValueError, TypeError):
1300       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1301       return
1302
1303     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1304       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1305     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1306       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1307     else:
1308       ntime_diff = None
1309
1310     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1311              "Node time diverges by at least %s from master node time",
1312              ntime_diff)
1313
1314   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1315     """Check the node time.
1316
1317     @type ninfo: L{objects.Node}
1318     @param ninfo: the node to check
1319     @param nresult: the remote results for the node
1320     @param vg_name: the configured VG name
1321
1322     """
1323     if vg_name is None:
1324       return
1325
1326     node = ninfo.name
1327     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1328
1329     # checks vg existence and size > 20G
1330     vglist = nresult.get(constants.NV_VGLIST, None)
1331     test = not vglist
1332     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1333     if not test:
1334       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1335                                             constants.MIN_VG_SIZE)
1336       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1337
1338     # check pv names
1339     pvlist = nresult.get(constants.NV_PVLIST, None)
1340     test = pvlist is None
1341     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1342     if not test:
1343       # check that ':' is not present in PV names, since it's a
1344       # special character for lvcreate (denotes the range of PEs to
1345       # use on the PV)
1346       for _, pvname, owner_vg in pvlist:
1347         test = ":" in pvname
1348         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1349                  " '%s' of VG '%s'", pvname, owner_vg)
1350
1351   def _VerifyNodeNetwork(self, ninfo, nresult):
1352     """Check the node time.
1353
1354     @type ninfo: L{objects.Node}
1355     @param ninfo: the node to check
1356     @param nresult: the remote results for the node
1357
1358     """
1359     node = ninfo.name
1360     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1361
1362     test = constants.NV_NODELIST not in nresult
1363     _ErrorIf(test, self.ENODESSH, node,
1364              "node hasn't returned node ssh connectivity data")
1365     if not test:
1366       if nresult[constants.NV_NODELIST]:
1367         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1368           _ErrorIf(True, self.ENODESSH, node,
1369                    "ssh communication with node '%s': %s", a_node, a_msg)
1370
1371     test = constants.NV_NODENETTEST not in nresult
1372     _ErrorIf(test, self.ENODENET, node,
1373              "node hasn't returned node tcp connectivity data")
1374     if not test:
1375       if nresult[constants.NV_NODENETTEST]:
1376         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1377         for anode in nlist:
1378           _ErrorIf(True, self.ENODENET, node,
1379                    "tcp communication with node '%s': %s",
1380                    anode, nresult[constants.NV_NODENETTEST][anode])
1381
1382     test = constants.NV_MASTERIP not in nresult
1383     _ErrorIf(test, self.ENODENET, node,
1384              "node hasn't returned node master IP reachability data")
1385     if not test:
1386       if not nresult[constants.NV_MASTERIP]:
1387         if node == self.master_node:
1388           msg = "the master node cannot reach the master IP (not configured?)"
1389         else:
1390           msg = "cannot reach the master IP"
1391         _ErrorIf(True, self.ENODENET, node, msg)
1392
1393
1394   def _VerifyInstance(self, instance, instanceconfig, node_image):
1395     """Verify an instance.
1396
1397     This function checks to see if the required block devices are
1398     available on the instance's node.
1399
1400     """
1401     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1402     node_current = instanceconfig.primary_node
1403
1404     node_vol_should = {}
1405     instanceconfig.MapLVsByNode(node_vol_should)
1406
1407     for node in node_vol_should:
1408       n_img = node_image[node]
1409       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1410         # ignore missing volumes on offline or broken nodes
1411         continue
1412       for volume in node_vol_should[node]:
1413         test = volume not in n_img.volumes
1414         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1415                  "volume %s missing on node %s", volume, node)
1416
1417     if instanceconfig.admin_up:
1418       pri_img = node_image[node_current]
1419       test = instance not in pri_img.instances and not pri_img.offline
1420       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1421                "instance not running on its primary node %s",
1422                node_current)
1423
1424     for node, n_img in node_image.items():
1425       if (not node == node_current):
1426         test = instance in n_img.instances
1427         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1428                  "instance should not run on node %s", node)
1429
1430   def _VerifyOrphanVolumes(self, node_vol_should, node_image):
1431     """Verify if there are any unknown volumes in the cluster.
1432
1433     The .os, .swap and backup volumes are ignored. All other volumes are
1434     reported as unknown.
1435
1436     """
1437     for node, n_img in node_image.items():
1438       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1439         # skip non-healthy nodes
1440         continue
1441       for volume in n_img.volumes:
1442         test = (node not in node_vol_should or
1443                 volume not in node_vol_should[node])
1444         self._ErrorIf(test, self.ENODEORPHANLV, node,
1445                       "volume %s is unknown", volume)
1446
1447   def _VerifyOrphanInstances(self, instancelist, node_image):
1448     """Verify the list of running instances.
1449
1450     This checks what instances are running but unknown to the cluster.
1451
1452     """
1453     for node, n_img in node_image.items():
1454       for o_inst in n_img.instances:
1455         test = o_inst not in instancelist
1456         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1457                       "instance %s on node %s should not exist", o_inst, node)
1458
1459   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1460     """Verify N+1 Memory Resilience.
1461
1462     Check that if one single node dies we can still start all the
1463     instances it was primary for.
1464
1465     """
1466     for node, n_img in node_image.items():
1467       # This code checks that every node which is now listed as
1468       # secondary has enough memory to host all instances it is
1469       # supposed to should a single other node in the cluster fail.
1470       # FIXME: not ready for failover to an arbitrary node
1471       # FIXME: does not support file-backed instances
1472       # WARNING: we currently take into account down instances as well
1473       # as up ones, considering that even if they're down someone
1474       # might want to start them even in the event of a node failure.
1475       for prinode, instances in n_img.sbp.items():
1476         needed_mem = 0
1477         for instance in instances:
1478           bep = self.cfg.GetClusterInfo().FillBE(instance_cfg[instance])
1479           if bep[constants.BE_AUTO_BALANCE]:
1480             needed_mem += bep[constants.BE_MEMORY]
1481         test = n_img.mfree < needed_mem
1482         self._ErrorIf(test, self.ENODEN1, node,
1483                       "not enough memory on to accommodate"
1484                       " failovers should peer node %s fail", prinode)
1485
1486   def _VerifyNodeFiles(self, ninfo, nresult, file_list, local_cksum,
1487                        master_files):
1488     """Verifies and computes the node required file checksums.
1489
1490     @type ninfo: L{objects.Node}
1491     @param ninfo: the node to check
1492     @param nresult: the remote results for the node
1493     @param file_list: required list of files
1494     @param local_cksum: dictionary of local files and their checksums
1495     @param master_files: list of files that only masters should have
1496
1497     """
1498     node = ninfo.name
1499     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1500
1501     remote_cksum = nresult.get(constants.NV_FILELIST, None)
1502     test = not isinstance(remote_cksum, dict)
1503     _ErrorIf(test, self.ENODEFILECHECK, node,
1504              "node hasn't returned file checksum data")
1505     if test:
1506       return
1507
1508     for file_name in file_list:
1509       node_is_mc = ninfo.master_candidate
1510       must_have = (file_name not in master_files) or node_is_mc
1511       # missing
1512       test1 = file_name not in remote_cksum
1513       # invalid checksum
1514       test2 = not test1 and remote_cksum[file_name] != local_cksum[file_name]
1515       # existing and good
1516       test3 = not test1 and remote_cksum[file_name] == local_cksum[file_name]
1517       _ErrorIf(test1 and must_have, self.ENODEFILECHECK, node,
1518                "file '%s' missing", file_name)
1519       _ErrorIf(test2 and must_have, self.ENODEFILECHECK, node,
1520                "file '%s' has wrong checksum", file_name)
1521       # not candidate and this is not a must-have file
1522       _ErrorIf(test2 and not must_have, self.ENODEFILECHECK, node,
1523                "file '%s' should not exist on non master"
1524                " candidates (and the file is outdated)", file_name)
1525       # all good, except non-master/non-must have combination
1526       _ErrorIf(test3 and not must_have, self.ENODEFILECHECK, node,
1527                "file '%s' should not exist"
1528                " on non master candidates", file_name)
1529
1530   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_map):
1531     """Verifies and the node DRBD status.
1532
1533     @type ninfo: L{objects.Node}
1534     @param ninfo: the node to check
1535     @param nresult: the remote results for the node
1536     @param instanceinfo: the dict of instances
1537     @param drbd_map: the DRBD map as returned by
1538         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1539
1540     """
1541     node = ninfo.name
1542     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1543
1544     # compute the DRBD minors
1545     node_drbd = {}
1546     for minor, instance in drbd_map[node].items():
1547       test = instance not in instanceinfo
1548       _ErrorIf(test, self.ECLUSTERCFG, None,
1549                "ghost instance '%s' in temporary DRBD map", instance)
1550         # ghost instance should not be running, but otherwise we
1551         # don't give double warnings (both ghost instance and
1552         # unallocated minor in use)
1553       if test:
1554         node_drbd[minor] = (instance, False)
1555       else:
1556         instance = instanceinfo[instance]
1557         node_drbd[minor] = (instance.name, instance.admin_up)
1558
1559     # and now check them
1560     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1561     test = not isinstance(used_minors, (tuple, list))
1562     _ErrorIf(test, self.ENODEDRBD, node,
1563              "cannot parse drbd status file: %s", str(used_minors))
1564     if test:
1565       # we cannot check drbd status
1566       return
1567
1568     for minor, (iname, must_exist) in node_drbd.items():
1569       test = minor not in used_minors and must_exist
1570       _ErrorIf(test, self.ENODEDRBD, node,
1571                "drbd minor %d of instance %s is not active", minor, iname)
1572     for minor in used_minors:
1573       test = minor not in node_drbd
1574       _ErrorIf(test, self.ENODEDRBD, node,
1575                "unallocated drbd minor %d is in use", minor)
1576
1577   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1578     """Verifies and updates the node volume data.
1579
1580     This function will update a L{NodeImage}'s internal structures
1581     with data from the remote call.
1582
1583     @type ninfo: L{objects.Node}
1584     @param ninfo: the node to check
1585     @param nresult: the remote results for the node
1586     @param nimg: the node image object
1587     @param vg_name: the configured VG name
1588
1589     """
1590     node = ninfo.name
1591     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1592
1593     nimg.lvm_fail = True
1594     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
1595     if vg_name is None:
1596       pass
1597     elif isinstance(lvdata, basestring):
1598       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
1599                utils.SafeEncode(lvdata))
1600     elif not isinstance(lvdata, dict):
1601       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
1602     else:
1603       nimg.volumes = lvdata
1604       nimg.lvm_fail = False
1605
1606   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
1607     """Verifies and updates the node instance list.
1608
1609     If the listing was successful, then updates this node's instance
1610     list. Otherwise, it marks the RPC call as failed for the instance
1611     list key.
1612
1613     @type ninfo: L{objects.Node}
1614     @param ninfo: the node to check
1615     @param nresult: the remote results for the node
1616     @param nimg: the node image object
1617
1618     """
1619     idata = nresult.get(constants.NV_INSTANCELIST, None)
1620     test = not isinstance(idata, list)
1621     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
1622                   " (instancelist): %s", utils.SafeEncode(str(idata)))
1623     if test:
1624       nimg.hyp_fail = True
1625     else:
1626       nimg.instances = idata
1627
1628   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
1629     """Verifies and computes a node information map
1630
1631     @type ninfo: L{objects.Node}
1632     @param ninfo: the node to check
1633     @param nresult: the remote results for the node
1634     @param nimg: the node image object
1635     @param vg_name: the configured VG name
1636
1637     """
1638     node = ninfo.name
1639     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1640
1641     # try to read free memory (from the hypervisor)
1642     hv_info = nresult.get(constants.NV_HVINFO, None)
1643     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
1644     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
1645     if not test:
1646       try:
1647         nimg.mfree = int(hv_info["memory_free"])
1648       except (ValueError, TypeError):
1649         _ErrorIf(True, self.ENODERPC, node,
1650                  "node returned invalid nodeinfo, check hypervisor")
1651
1652     # FIXME: devise a free space model for file based instances as well
1653     if vg_name is not None:
1654       test = (constants.NV_VGLIST not in nresult or
1655               vg_name not in nresult[constants.NV_VGLIST])
1656       _ErrorIf(test, self.ENODELVM, node,
1657                "node didn't return data for the volume group '%s'"
1658                " - it is either missing or broken", vg_name)
1659       if not test:
1660         try:
1661           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
1662         except (ValueError, TypeError):
1663           _ErrorIf(True, self.ENODERPC, node,
1664                    "node returned invalid LVM info, check LVM status")
1665
1666   def CheckPrereq(self):
1667     """Check prerequisites.
1668
1669     Transform the list of checks we're going to skip into a set and check that
1670     all its members are valid.
1671
1672     """
1673     self.skip_set = frozenset(self.op.skip_checks)
1674     if not constants.VERIFY_OPTIONAL_CHECKS.issuperset(self.skip_set):
1675       raise errors.OpPrereqError("Invalid checks to be skipped specified",
1676                                  errors.ECODE_INVAL)
1677
1678   def BuildHooksEnv(self):
1679     """Build hooks env.
1680
1681     Cluster-Verify hooks just ran in the post phase and their failure makes
1682     the output be logged in the verify output and the verification to fail.
1683
1684     """
1685     all_nodes = self.cfg.GetNodeList()
1686     env = {
1687       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
1688       }
1689     for node in self.cfg.GetAllNodesInfo().values():
1690       env["NODE_TAGS_%s" % node.name] = " ".join(node.GetTags())
1691
1692     return env, [], all_nodes
1693
1694   def Exec(self, feedback_fn):
1695     """Verify integrity of cluster, performing various test on nodes.
1696
1697     """
1698     self.bad = False
1699     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1700     verbose = self.op.verbose
1701     self._feedback_fn = feedback_fn
1702     feedback_fn("* Verifying global settings")
1703     for msg in self.cfg.VerifyConfig():
1704       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
1705
1706     # Check the cluster certificates
1707     for cert_filename in constants.ALL_CERT_FILES:
1708       (errcode, msg) = _VerifyCertificate(cert_filename)
1709       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1710
1711     vg_name = self.cfg.GetVGName()
1712     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
1713     cluster = self.cfg.GetClusterInfo()
1714     nodelist = utils.NiceSort(self.cfg.GetNodeList())
1715     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
1716     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
1717     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
1718                         for iname in instancelist)
1719     i_non_redundant = [] # Non redundant instances
1720     i_non_a_balanced = [] # Non auto-balanced instances
1721     n_offline = 0 # Count of offline nodes
1722     n_drained = 0 # Count of nodes being drained
1723     node_vol_should = {}
1724
1725     # FIXME: verify OS list
1726     # do local checksums
1727     master_files = [constants.CLUSTER_CONF_FILE]
1728     master_node = self.master_node = self.cfg.GetMasterNode()
1729     master_ip = self.cfg.GetMasterIP()
1730
1731     file_names = ssconf.SimpleStore().GetFileList()
1732     file_names.extend(constants.ALL_CERT_FILES)
1733     file_names.extend(master_files)
1734     if cluster.modify_etc_hosts:
1735       file_names.append(constants.ETC_HOSTS)
1736
1737     local_checksums = utils.FingerprintFiles(file_names)
1738
1739     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
1740     node_verify_param = {
1741       constants.NV_FILELIST: file_names,
1742       constants.NV_NODELIST: [node.name for node in nodeinfo
1743                               if not node.offline],
1744       constants.NV_HYPERVISOR: hypervisors,
1745       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
1746                                   node.secondary_ip) for node in nodeinfo
1747                                  if not node.offline],
1748       constants.NV_INSTANCELIST: hypervisors,
1749       constants.NV_VERSION: None,
1750       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
1751       constants.NV_NODESETUP: None,
1752       constants.NV_TIME: None,
1753       constants.NV_MASTERIP: (master_node, master_ip),
1754       }
1755
1756     if vg_name is not None:
1757       node_verify_param[constants.NV_VGLIST] = None
1758       node_verify_param[constants.NV_LVLIST] = vg_name
1759       node_verify_param[constants.NV_PVLIST] = [vg_name]
1760       node_verify_param[constants.NV_DRBDLIST] = None
1761
1762     # Build our expected cluster state
1763     node_image = dict((node.name, self.NodeImage(offline=node.offline))
1764                       for node in nodeinfo)
1765
1766     for instance in instancelist:
1767       inst_config = instanceinfo[instance]
1768
1769       for nname in inst_config.all_nodes:
1770         if nname not in node_image:
1771           # ghost node
1772           gnode = self.NodeImage()
1773           gnode.ghost = True
1774           node_image[nname] = gnode
1775
1776       inst_config.MapLVsByNode(node_vol_should)
1777
1778       pnode = inst_config.primary_node
1779       node_image[pnode].pinst.append(instance)
1780
1781       for snode in inst_config.secondary_nodes:
1782         nimg = node_image[snode]
1783         nimg.sinst.append(instance)
1784         if pnode not in nimg.sbp:
1785           nimg.sbp[pnode] = []
1786         nimg.sbp[pnode].append(instance)
1787
1788     # At this point, we have the in-memory data structures complete,
1789     # except for the runtime information, which we'll gather next
1790
1791     # Due to the way our RPC system works, exact response times cannot be
1792     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
1793     # time before and after executing the request, we can at least have a time
1794     # window.
1795     nvinfo_starttime = time.time()
1796     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
1797                                            self.cfg.GetClusterName())
1798     nvinfo_endtime = time.time()
1799
1800     all_drbd_map = self.cfg.ComputeDRBDMap()
1801
1802     feedback_fn("* Verifying node status")
1803     for node_i in nodeinfo:
1804       node = node_i.name
1805       nimg = node_image[node]
1806
1807       if node_i.offline:
1808         if verbose:
1809           feedback_fn("* Skipping offline node %s" % (node,))
1810         n_offline += 1
1811         continue
1812
1813       if node == master_node:
1814         ntype = "master"
1815       elif node_i.master_candidate:
1816         ntype = "master candidate"
1817       elif node_i.drained:
1818         ntype = "drained"
1819         n_drained += 1
1820       else:
1821         ntype = "regular"
1822       if verbose:
1823         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
1824
1825       msg = all_nvinfo[node].fail_msg
1826       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
1827       if msg:
1828         nimg.rpc_fail = True
1829         continue
1830
1831       nresult = all_nvinfo[node].payload
1832
1833       nimg.call_ok = self._VerifyNode(node_i, nresult)
1834       self._VerifyNodeNetwork(node_i, nresult)
1835       self._VerifyNodeLVM(node_i, nresult, vg_name)
1836       self._VerifyNodeFiles(node_i, nresult, file_names, local_checksums,
1837                             master_files)
1838       self._VerifyNodeDrbd(node_i, nresult, instanceinfo, all_drbd_map)
1839       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
1840
1841       self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
1842       self._UpdateNodeInstances(node_i, nresult, nimg)
1843       self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
1844
1845     feedback_fn("* Verifying instance status")
1846     for instance in instancelist:
1847       if verbose:
1848         feedback_fn("* Verifying instance %s" % instance)
1849       inst_config = instanceinfo[instance]
1850       self._VerifyInstance(instance, inst_config, node_image)
1851       inst_nodes_offline = []
1852
1853       pnode = inst_config.primary_node
1854       pnode_img = node_image[pnode]
1855       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
1856                self.ENODERPC, pnode, "instance %s, connection to"
1857                " primary node failed", instance)
1858
1859       if pnode_img.offline:
1860         inst_nodes_offline.append(pnode)
1861
1862       # If the instance is non-redundant we cannot survive losing its primary
1863       # node, so we are not N+1 compliant. On the other hand we have no disk
1864       # templates with more than one secondary so that situation is not well
1865       # supported either.
1866       # FIXME: does not support file-backed instances
1867       if not inst_config.secondary_nodes:
1868         i_non_redundant.append(instance)
1869       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
1870                instance, "instance has multiple secondary nodes: %s",
1871                utils.CommaJoin(inst_config.secondary_nodes),
1872                code=self.ETYPE_WARNING)
1873
1874       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
1875         i_non_a_balanced.append(instance)
1876
1877       for snode in inst_config.secondary_nodes:
1878         s_img = node_image[snode]
1879         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
1880                  "instance %s, connection to secondary node failed", instance)
1881
1882         if s_img.offline:
1883           inst_nodes_offline.append(snode)
1884
1885       # warn that the instance lives on offline nodes
1886       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
1887                "instance lives on offline node(s) %s",
1888                utils.CommaJoin(inst_nodes_offline))
1889       # ... or ghost nodes
1890       for node in inst_config.all_nodes:
1891         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
1892                  "instance lives on ghost node %s", node)
1893
1894     feedback_fn("* Verifying orphan volumes")
1895     self._VerifyOrphanVolumes(node_vol_should, node_image)
1896
1897     feedback_fn("* Verifying orphan instances")
1898     self._VerifyOrphanInstances(instancelist, node_image)
1899
1900     if constants.VERIFY_NPLUSONE_MEM not in self.skip_set:
1901       feedback_fn("* Verifying N+1 Memory redundancy")
1902       self._VerifyNPlusOneMemory(node_image, instanceinfo)
1903
1904     feedback_fn("* Other Notes")
1905     if i_non_redundant:
1906       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
1907                   % len(i_non_redundant))
1908
1909     if i_non_a_balanced:
1910       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
1911                   % len(i_non_a_balanced))
1912
1913     if n_offline:
1914       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
1915
1916     if n_drained:
1917       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
1918
1919     return not self.bad
1920
1921   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
1922     """Analyze the post-hooks' result
1923
1924     This method analyses the hook result, handles it, and sends some
1925     nicely-formatted feedback back to the user.
1926
1927     @param phase: one of L{constants.HOOKS_PHASE_POST} or
1928         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
1929     @param hooks_results: the results of the multi-node hooks rpc call
1930     @param feedback_fn: function used send feedback back to the caller
1931     @param lu_result: previous Exec result
1932     @return: the new Exec result, based on the previous result
1933         and hook results
1934
1935     """
1936     # We only really run POST phase hooks, and are only interested in
1937     # their results
1938     if phase == constants.HOOKS_PHASE_POST:
1939       # Used to change hooks' output to proper indentation
1940       indent_re = re.compile('^', re.M)
1941       feedback_fn("* Hooks Results")
1942       assert hooks_results, "invalid result from hooks"
1943
1944       for node_name in hooks_results:
1945         res = hooks_results[node_name]
1946         msg = res.fail_msg
1947         test = msg and not res.offline
1948         self._ErrorIf(test, self.ENODEHOOKS, node_name,
1949                       "Communication failure in hooks execution: %s", msg)
1950         if res.offline or msg:
1951           # No need to investigate payload if node is offline or gave an error.
1952           # override manually lu_result here as _ErrorIf only
1953           # overrides self.bad
1954           lu_result = 1
1955           continue
1956         for script, hkr, output in res.payload:
1957           test = hkr == constants.HKR_FAIL
1958           self._ErrorIf(test, self.ENODEHOOKS, node_name,
1959                         "Script %s failed, output:", script)
1960           if test:
1961             output = indent_re.sub('      ', output)
1962             feedback_fn("%s" % output)
1963             lu_result = 0
1964
1965       return lu_result
1966
1967
1968 class LUVerifyDisks(NoHooksLU):
1969   """Verifies the cluster disks status.
1970
1971   """
1972   _OP_REQP = []
1973   REQ_BGL = False
1974
1975   def ExpandNames(self):
1976     self.needed_locks = {
1977       locking.LEVEL_NODE: locking.ALL_SET,
1978       locking.LEVEL_INSTANCE: locking.ALL_SET,
1979     }
1980     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1981
1982   def CheckPrereq(self):
1983     """Check prerequisites.
1984
1985     This has no prerequisites.
1986
1987     """
1988     pass
1989
1990   def Exec(self, feedback_fn):
1991     """Verify integrity of cluster disks.
1992
1993     @rtype: tuple of three items
1994     @return: a tuple of (dict of node-to-node_error, list of instances
1995         which need activate-disks, dict of instance: (node, volume) for
1996         missing volumes
1997
1998     """
1999     result = res_nodes, res_instances, res_missing = {}, [], {}
2000
2001     vg_name = self.cfg.GetVGName()
2002     nodes = utils.NiceSort(self.cfg.GetNodeList())
2003     instances = [self.cfg.GetInstanceInfo(name)
2004                  for name in self.cfg.GetInstanceList()]
2005
2006     nv_dict = {}
2007     for inst in instances:
2008       inst_lvs = {}
2009       if (not inst.admin_up or
2010           inst.disk_template not in constants.DTS_NET_MIRROR):
2011         continue
2012       inst.MapLVsByNode(inst_lvs)
2013       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2014       for node, vol_list in inst_lvs.iteritems():
2015         for vol in vol_list:
2016           nv_dict[(node, vol)] = inst
2017
2018     if not nv_dict:
2019       return result
2020
2021     node_lvs = self.rpc.call_lv_list(nodes, vg_name)
2022
2023     for node in nodes:
2024       # node_volume
2025       node_res = node_lvs[node]
2026       if node_res.offline:
2027         continue
2028       msg = node_res.fail_msg
2029       if msg:
2030         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2031         res_nodes[node] = msg
2032         continue
2033
2034       lvs = node_res.payload
2035       for lv_name, (_, _, lv_online) in lvs.items():
2036         inst = nv_dict.pop((node, lv_name), None)
2037         if (not lv_online and inst is not None
2038             and inst.name not in res_instances):
2039           res_instances.append(inst.name)
2040
2041     # any leftover items in nv_dict are missing LVs, let's arrange the
2042     # data better
2043     for key, inst in nv_dict.iteritems():
2044       if inst.name not in res_missing:
2045         res_missing[inst.name] = []
2046       res_missing[inst.name].append(key)
2047
2048     return result
2049
2050
2051 class LURepairDiskSizes(NoHooksLU):
2052   """Verifies the cluster disks sizes.
2053
2054   """
2055   _OP_REQP = ["instances"]
2056   REQ_BGL = False
2057
2058   def ExpandNames(self):
2059     if not isinstance(self.op.instances, list):
2060       raise errors.OpPrereqError("Invalid argument type 'instances'",
2061                                  errors.ECODE_INVAL)
2062
2063     if self.op.instances:
2064       self.wanted_names = []
2065       for name in self.op.instances:
2066         full_name = _ExpandInstanceName(self.cfg, name)
2067         self.wanted_names.append(full_name)
2068       self.needed_locks = {
2069         locking.LEVEL_NODE: [],
2070         locking.LEVEL_INSTANCE: self.wanted_names,
2071         }
2072       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2073     else:
2074       self.wanted_names = None
2075       self.needed_locks = {
2076         locking.LEVEL_NODE: locking.ALL_SET,
2077         locking.LEVEL_INSTANCE: locking.ALL_SET,
2078         }
2079     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2080
2081   def DeclareLocks(self, level):
2082     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2083       self._LockInstancesNodes(primary_only=True)
2084
2085   def CheckPrereq(self):
2086     """Check prerequisites.
2087
2088     This only checks the optional instance list against the existing names.
2089
2090     """
2091     if self.wanted_names is None:
2092       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2093
2094     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2095                              in self.wanted_names]
2096
2097   def _EnsureChildSizes(self, disk):
2098     """Ensure children of the disk have the needed disk size.
2099
2100     This is valid mainly for DRBD8 and fixes an issue where the
2101     children have smaller disk size.
2102
2103     @param disk: an L{ganeti.objects.Disk} object
2104
2105     """
2106     if disk.dev_type == constants.LD_DRBD8:
2107       assert disk.children, "Empty children for DRBD8?"
2108       fchild = disk.children[0]
2109       mismatch = fchild.size < disk.size
2110       if mismatch:
2111         self.LogInfo("Child disk has size %d, parent %d, fixing",
2112                      fchild.size, disk.size)
2113         fchild.size = disk.size
2114
2115       # and we recurse on this child only, not on the metadev
2116       return self._EnsureChildSizes(fchild) or mismatch
2117     else:
2118       return False
2119
2120   def Exec(self, feedback_fn):
2121     """Verify the size of cluster disks.
2122
2123     """
2124     # TODO: check child disks too
2125     # TODO: check differences in size between primary/secondary nodes
2126     per_node_disks = {}
2127     for instance in self.wanted_instances:
2128       pnode = instance.primary_node
2129       if pnode not in per_node_disks:
2130         per_node_disks[pnode] = []
2131       for idx, disk in enumerate(instance.disks):
2132         per_node_disks[pnode].append((instance, idx, disk))
2133
2134     changed = []
2135     for node, dskl in per_node_disks.items():
2136       newl = [v[2].Copy() for v in dskl]
2137       for dsk in newl:
2138         self.cfg.SetDiskID(dsk, node)
2139       result = self.rpc.call_blockdev_getsizes(node, newl)
2140       if result.fail_msg:
2141         self.LogWarning("Failure in blockdev_getsizes call to node"
2142                         " %s, ignoring", node)
2143         continue
2144       if len(result.data) != len(dskl):
2145         self.LogWarning("Invalid result from node %s, ignoring node results",
2146                         node)
2147         continue
2148       for ((instance, idx, disk), size) in zip(dskl, result.data):
2149         if size is None:
2150           self.LogWarning("Disk %d of instance %s did not return size"
2151                           " information, ignoring", idx, instance.name)
2152           continue
2153         if not isinstance(size, (int, long)):
2154           self.LogWarning("Disk %d of instance %s did not return valid"
2155                           " size information, ignoring", idx, instance.name)
2156           continue
2157         size = size >> 20
2158         if size != disk.size:
2159           self.LogInfo("Disk %d of instance %s has mismatched size,"
2160                        " correcting: recorded %d, actual %d", idx,
2161                        instance.name, disk.size, size)
2162           disk.size = size
2163           self.cfg.Update(instance, feedback_fn)
2164           changed.append((instance.name, idx, size))
2165         if self._EnsureChildSizes(disk):
2166           self.cfg.Update(instance, feedback_fn)
2167           changed.append((instance.name, idx, disk.size))
2168     return changed
2169
2170
2171 class LURenameCluster(LogicalUnit):
2172   """Rename the cluster.
2173
2174   """
2175   HPATH = "cluster-rename"
2176   HTYPE = constants.HTYPE_CLUSTER
2177   _OP_REQP = ["name"]
2178
2179   def BuildHooksEnv(self):
2180     """Build hooks env.
2181
2182     """
2183     env = {
2184       "OP_TARGET": self.cfg.GetClusterName(),
2185       "NEW_NAME": self.op.name,
2186       }
2187     mn = self.cfg.GetMasterNode()
2188     all_nodes = self.cfg.GetNodeList()
2189     return env, [mn], all_nodes
2190
2191   def CheckPrereq(self):
2192     """Verify that the passed name is a valid one.
2193
2194     """
2195     hostname = utils.GetHostInfo(self.op.name)
2196
2197     new_name = hostname.name
2198     self.ip = new_ip = hostname.ip
2199     old_name = self.cfg.GetClusterName()
2200     old_ip = self.cfg.GetMasterIP()
2201     if new_name == old_name and new_ip == old_ip:
2202       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2203                                  " cluster has changed",
2204                                  errors.ECODE_INVAL)
2205     if new_ip != old_ip:
2206       if utils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2207         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2208                                    " reachable on the network. Aborting." %
2209                                    new_ip, errors.ECODE_NOTUNIQUE)
2210
2211     self.op.name = new_name
2212
2213   def Exec(self, feedback_fn):
2214     """Rename the cluster.
2215
2216     """
2217     clustername = self.op.name
2218     ip = self.ip
2219
2220     # shutdown the master IP
2221     master = self.cfg.GetMasterNode()
2222     result = self.rpc.call_node_stop_master(master, False)
2223     result.Raise("Could not disable the master role")
2224
2225     try:
2226       cluster = self.cfg.GetClusterInfo()
2227       cluster.cluster_name = clustername
2228       cluster.master_ip = ip
2229       self.cfg.Update(cluster, feedback_fn)
2230
2231       # update the known hosts file
2232       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2233       node_list = self.cfg.GetNodeList()
2234       try:
2235         node_list.remove(master)
2236       except ValueError:
2237         pass
2238       result = self.rpc.call_upload_file(node_list,
2239                                          constants.SSH_KNOWN_HOSTS_FILE)
2240       for to_node, to_result in result.iteritems():
2241         msg = to_result.fail_msg
2242         if msg:
2243           msg = ("Copy of file %s to node %s failed: %s" %
2244                  (constants.SSH_KNOWN_HOSTS_FILE, to_node, msg))
2245           self.proc.LogWarning(msg)
2246
2247     finally:
2248       result = self.rpc.call_node_start_master(master, False, False)
2249       msg = result.fail_msg
2250       if msg:
2251         self.LogWarning("Could not re-enable the master role on"
2252                         " the master, please restart manually: %s", msg)
2253
2254
2255 def _RecursiveCheckIfLVMBased(disk):
2256   """Check if the given disk or its children are lvm-based.
2257
2258   @type disk: L{objects.Disk}
2259   @param disk: the disk to check
2260   @rtype: boolean
2261   @return: boolean indicating whether a LD_LV dev_type was found or not
2262
2263   """
2264   if disk.children:
2265     for chdisk in disk.children:
2266       if _RecursiveCheckIfLVMBased(chdisk):
2267         return True
2268   return disk.dev_type == constants.LD_LV
2269
2270
2271 class LUSetClusterParams(LogicalUnit):
2272   """Change the parameters of the cluster.
2273
2274   """
2275   HPATH = "cluster-modify"
2276   HTYPE = constants.HTYPE_CLUSTER
2277   _OP_REQP = []
2278   REQ_BGL = False
2279
2280   def CheckArguments(self):
2281     """Check parameters
2282
2283     """
2284     for attr in ["candidate_pool_size",
2285                  "uid_pool", "add_uids", "remove_uids"]:
2286       if not hasattr(self.op, attr):
2287         setattr(self.op, attr, None)
2288
2289     if self.op.candidate_pool_size is not None:
2290       try:
2291         self.op.candidate_pool_size = int(self.op.candidate_pool_size)
2292       except (ValueError, TypeError), err:
2293         raise errors.OpPrereqError("Invalid candidate_pool_size value: %s" %
2294                                    str(err), errors.ECODE_INVAL)
2295       if self.op.candidate_pool_size < 1:
2296         raise errors.OpPrereqError("At least one master candidate needed",
2297                                    errors.ECODE_INVAL)
2298
2299     _CheckBooleanOpField(self.op, "maintain_node_health")
2300
2301     if self.op.uid_pool:
2302       uidpool.CheckUidPool(self.op.uid_pool)
2303
2304     if self.op.add_uids:
2305       uidpool.CheckUidPool(self.op.add_uids)
2306
2307     if self.op.remove_uids:
2308       uidpool.CheckUidPool(self.op.remove_uids)
2309
2310   def ExpandNames(self):
2311     # FIXME: in the future maybe other cluster params won't require checking on
2312     # all nodes to be modified.
2313     self.needed_locks = {
2314       locking.LEVEL_NODE: locking.ALL_SET,
2315     }
2316     self.share_locks[locking.LEVEL_NODE] = 1
2317
2318   def BuildHooksEnv(self):
2319     """Build hooks env.
2320
2321     """
2322     env = {
2323       "OP_TARGET": self.cfg.GetClusterName(),
2324       "NEW_VG_NAME": self.op.vg_name,
2325       }
2326     mn = self.cfg.GetMasterNode()
2327     return env, [mn], [mn]
2328
2329   def CheckPrereq(self):
2330     """Check prerequisites.
2331
2332     This checks whether the given params don't conflict and
2333     if the given volume group is valid.
2334
2335     """
2336     if self.op.vg_name is not None and not self.op.vg_name:
2337       instances = self.cfg.GetAllInstancesInfo().values()
2338       for inst in instances:
2339         for disk in inst.disks:
2340           if _RecursiveCheckIfLVMBased(disk):
2341             raise errors.OpPrereqError("Cannot disable lvm storage while"
2342                                        " lvm-based instances exist",
2343                                        errors.ECODE_INVAL)
2344
2345     node_list = self.acquired_locks[locking.LEVEL_NODE]
2346
2347     # if vg_name not None, checks given volume group on all nodes
2348     if self.op.vg_name:
2349       vglist = self.rpc.call_vg_list(node_list)
2350       for node in node_list:
2351         msg = vglist[node].fail_msg
2352         if msg:
2353           # ignoring down node
2354           self.LogWarning("Error while gathering data on node %s"
2355                           " (ignoring node): %s", node, msg)
2356           continue
2357         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2358                                               self.op.vg_name,
2359                                               constants.MIN_VG_SIZE)
2360         if vgstatus:
2361           raise errors.OpPrereqError("Error on node '%s': %s" %
2362                                      (node, vgstatus), errors.ECODE_ENVIRON)
2363
2364     self.cluster = cluster = self.cfg.GetClusterInfo()
2365     # validate params changes
2366     if self.op.beparams:
2367       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2368       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2369
2370     if self.op.nicparams:
2371       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2372       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2373       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2374       nic_errors = []
2375
2376       # check all instances for consistency
2377       for instance in self.cfg.GetAllInstancesInfo().values():
2378         for nic_idx, nic in enumerate(instance.nics):
2379           params_copy = copy.deepcopy(nic.nicparams)
2380           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2381
2382           # check parameter syntax
2383           try:
2384             objects.NIC.CheckParameterSyntax(params_filled)
2385           except errors.ConfigurationError, err:
2386             nic_errors.append("Instance %s, nic/%d: %s" %
2387                               (instance.name, nic_idx, err))
2388
2389           # if we're moving instances to routed, check that they have an ip
2390           target_mode = params_filled[constants.NIC_MODE]
2391           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2392             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2393                               (instance.name, nic_idx))
2394       if nic_errors:
2395         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2396                                    "\n".join(nic_errors))
2397
2398     # hypervisor list/parameters
2399     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2400     if self.op.hvparams:
2401       if not isinstance(self.op.hvparams, dict):
2402         raise errors.OpPrereqError("Invalid 'hvparams' parameter on input",
2403                                    errors.ECODE_INVAL)
2404       for hv_name, hv_dict in self.op.hvparams.items():
2405         if hv_name not in self.new_hvparams:
2406           self.new_hvparams[hv_name] = hv_dict
2407         else:
2408           self.new_hvparams[hv_name].update(hv_dict)
2409
2410     # os hypervisor parameters
2411     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2412     if self.op.os_hvp:
2413       if not isinstance(self.op.os_hvp, dict):
2414         raise errors.OpPrereqError("Invalid 'os_hvp' parameter on input",
2415                                    errors.ECODE_INVAL)
2416       for os_name, hvs in self.op.os_hvp.items():
2417         if not isinstance(hvs, dict):
2418           raise errors.OpPrereqError(("Invalid 'os_hvp' parameter on"
2419                                       " input"), errors.ECODE_INVAL)
2420         if os_name not in self.new_os_hvp:
2421           self.new_os_hvp[os_name] = hvs
2422         else:
2423           for hv_name, hv_dict in hvs.items():
2424             if hv_name not in self.new_os_hvp[os_name]:
2425               self.new_os_hvp[os_name][hv_name] = hv_dict
2426             else:
2427               self.new_os_hvp[os_name][hv_name].update(hv_dict)
2428
2429     # os parameters
2430     self.new_osp = objects.FillDict(cluster.osparams, {})
2431     if self.op.osparams:
2432       if not isinstance(self.op.osparams, dict):
2433         raise errors.OpPrereqError("Invalid 'osparams' parameter on input",
2434                                    errors.ECODE_INVAL)
2435       for os_name, osp in self.op.osparams.items():
2436         if not isinstance(osp, dict):
2437           raise errors.OpPrereqError(("Invalid 'osparams' parameter on"
2438                                       " input"), errors.ECODE_INVAL)
2439         if os_name not in self.new_osp:
2440           self.new_osp[os_name] = {}
2441
2442         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
2443                                                   use_none=True)
2444
2445         if not self.new_osp[os_name]:
2446           # we removed all parameters
2447           del self.new_osp[os_name]
2448         else:
2449           # check the parameter validity (remote check)
2450           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
2451                          os_name, self.new_osp[os_name])
2452
2453     # changes to the hypervisor list
2454     if self.op.enabled_hypervisors is not None:
2455       self.hv_list = self.op.enabled_hypervisors
2456       if not self.hv_list:
2457         raise errors.OpPrereqError("Enabled hypervisors list must contain at"
2458                                    " least one member",
2459                                    errors.ECODE_INVAL)
2460       invalid_hvs = set(self.hv_list) - constants.HYPER_TYPES
2461       if invalid_hvs:
2462         raise errors.OpPrereqError("Enabled hypervisors contains invalid"
2463                                    " entries: %s" %
2464                                    utils.CommaJoin(invalid_hvs),
2465                                    errors.ECODE_INVAL)
2466       for hv in self.hv_list:
2467         # if the hypervisor doesn't already exist in the cluster
2468         # hvparams, we initialize it to empty, and then (in both
2469         # cases) we make sure to fill the defaults, as we might not
2470         # have a complete defaults list if the hypervisor wasn't
2471         # enabled before
2472         if hv not in new_hvp:
2473           new_hvp[hv] = {}
2474         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
2475         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
2476     else:
2477       self.hv_list = cluster.enabled_hypervisors
2478
2479     if self.op.hvparams or self.op.enabled_hypervisors is not None:
2480       # either the enabled list has changed, or the parameters have, validate
2481       for hv_name, hv_params in self.new_hvparams.items():
2482         if ((self.op.hvparams and hv_name in self.op.hvparams) or
2483             (self.op.enabled_hypervisors and
2484              hv_name in self.op.enabled_hypervisors)):
2485           # either this is a new hypervisor, or its parameters have changed
2486           hv_class = hypervisor.GetHypervisor(hv_name)
2487           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2488           hv_class.CheckParameterSyntax(hv_params)
2489           _CheckHVParams(self, node_list, hv_name, hv_params)
2490
2491     if self.op.os_hvp:
2492       # no need to check any newly-enabled hypervisors, since the
2493       # defaults have already been checked in the above code-block
2494       for os_name, os_hvp in self.new_os_hvp.items():
2495         for hv_name, hv_params in os_hvp.items():
2496           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2497           # we need to fill in the new os_hvp on top of the actual hv_p
2498           cluster_defaults = self.new_hvparams.get(hv_name, {})
2499           new_osp = objects.FillDict(cluster_defaults, hv_params)
2500           hv_class = hypervisor.GetHypervisor(hv_name)
2501           hv_class.CheckParameterSyntax(new_osp)
2502           _CheckHVParams(self, node_list, hv_name, new_osp)
2503
2504
2505   def Exec(self, feedback_fn):
2506     """Change the parameters of the cluster.
2507
2508     """
2509     if self.op.vg_name is not None:
2510       new_volume = self.op.vg_name
2511       if not new_volume:
2512         new_volume = None
2513       if new_volume != self.cfg.GetVGName():
2514         self.cfg.SetVGName(new_volume)
2515       else:
2516         feedback_fn("Cluster LVM configuration already in desired"
2517                     " state, not changing")
2518     if self.op.hvparams:
2519       self.cluster.hvparams = self.new_hvparams
2520     if self.op.os_hvp:
2521       self.cluster.os_hvp = self.new_os_hvp
2522     if self.op.enabled_hypervisors is not None:
2523       self.cluster.hvparams = self.new_hvparams
2524       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
2525     if self.op.beparams:
2526       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
2527     if self.op.nicparams:
2528       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
2529     if self.op.osparams:
2530       self.cluster.osparams = self.new_osp
2531
2532     if self.op.candidate_pool_size is not None:
2533       self.cluster.candidate_pool_size = self.op.candidate_pool_size
2534       # we need to update the pool size here, otherwise the save will fail
2535       _AdjustCandidatePool(self, [])
2536
2537     if self.op.maintain_node_health is not None:
2538       self.cluster.maintain_node_health = self.op.maintain_node_health
2539
2540     if self.op.add_uids is not None:
2541       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
2542
2543     if self.op.remove_uids is not None:
2544       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
2545
2546     if self.op.uid_pool is not None:
2547       self.cluster.uid_pool = self.op.uid_pool
2548
2549     self.cfg.Update(self.cluster, feedback_fn)
2550
2551
2552 def _RedistributeAncillaryFiles(lu, additional_nodes=None):
2553   """Distribute additional files which are part of the cluster configuration.
2554
2555   ConfigWriter takes care of distributing the config and ssconf files, but
2556   there are more files which should be distributed to all nodes. This function
2557   makes sure those are copied.
2558
2559   @param lu: calling logical unit
2560   @param additional_nodes: list of nodes not in the config to distribute to
2561
2562   """
2563   # 1. Gather target nodes
2564   myself = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
2565   dist_nodes = lu.cfg.GetOnlineNodeList()
2566   if additional_nodes is not None:
2567     dist_nodes.extend(additional_nodes)
2568   if myself.name in dist_nodes:
2569     dist_nodes.remove(myself.name)
2570
2571   # 2. Gather files to distribute
2572   dist_files = set([constants.ETC_HOSTS,
2573                     constants.SSH_KNOWN_HOSTS_FILE,
2574                     constants.RAPI_CERT_FILE,
2575                     constants.RAPI_USERS_FILE,
2576                     constants.CONFD_HMAC_KEY,
2577                     constants.CLUSTER_DOMAIN_SECRET_FILE,
2578                    ])
2579
2580   enabled_hypervisors = lu.cfg.GetClusterInfo().enabled_hypervisors
2581   for hv_name in enabled_hypervisors:
2582     hv_class = hypervisor.GetHypervisor(hv_name)
2583     dist_files.update(hv_class.GetAncillaryFiles())
2584
2585   # 3. Perform the files upload
2586   for fname in dist_files:
2587     if os.path.exists(fname):
2588       result = lu.rpc.call_upload_file(dist_nodes, fname)
2589       for to_node, to_result in result.items():
2590         msg = to_result.fail_msg
2591         if msg:
2592           msg = ("Copy of file %s to node %s failed: %s" %
2593                  (fname, to_node, msg))
2594           lu.proc.LogWarning(msg)
2595
2596
2597 class LURedistributeConfig(NoHooksLU):
2598   """Force the redistribution of cluster configuration.
2599
2600   This is a very simple LU.
2601
2602   """
2603   _OP_REQP = []
2604   REQ_BGL = False
2605
2606   def ExpandNames(self):
2607     self.needed_locks = {
2608       locking.LEVEL_NODE: locking.ALL_SET,
2609     }
2610     self.share_locks[locking.LEVEL_NODE] = 1
2611
2612   def CheckPrereq(self):
2613     """Check prerequisites.
2614
2615     """
2616
2617   def Exec(self, feedback_fn):
2618     """Redistribute the configuration.
2619
2620     """
2621     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
2622     _RedistributeAncillaryFiles(self)
2623
2624
2625 def _WaitForSync(lu, instance, disks=None, oneshot=False):
2626   """Sleep and poll for an instance's disk to sync.
2627
2628   """
2629   if not instance.disks or disks is not None and not disks:
2630     return True
2631
2632   disks = _ExpandCheckDisks(instance, disks)
2633
2634   if not oneshot:
2635     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
2636
2637   node = instance.primary_node
2638
2639   for dev in disks:
2640     lu.cfg.SetDiskID(dev, node)
2641
2642   # TODO: Convert to utils.Retry
2643
2644   retries = 0
2645   degr_retries = 10 # in seconds, as we sleep 1 second each time
2646   while True:
2647     max_time = 0
2648     done = True
2649     cumul_degraded = False
2650     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
2651     msg = rstats.fail_msg
2652     if msg:
2653       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
2654       retries += 1
2655       if retries >= 10:
2656         raise errors.RemoteError("Can't contact node %s for mirror data,"
2657                                  " aborting." % node)
2658       time.sleep(6)
2659       continue
2660     rstats = rstats.payload
2661     retries = 0
2662     for i, mstat in enumerate(rstats):
2663       if mstat is None:
2664         lu.LogWarning("Can't compute data for node %s/%s",
2665                            node, disks[i].iv_name)
2666         continue
2667
2668       cumul_degraded = (cumul_degraded or
2669                         (mstat.is_degraded and mstat.sync_percent is None))
2670       if mstat.sync_percent is not None:
2671         done = False
2672         if mstat.estimated_time is not None:
2673           rem_time = ("%s remaining (estimated)" %
2674                       utils.FormatSeconds(mstat.estimated_time))
2675           max_time = mstat.estimated_time
2676         else:
2677           rem_time = "no time estimate"
2678         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
2679                         (disks[i].iv_name, mstat.sync_percent, rem_time))
2680
2681     # if we're done but degraded, let's do a few small retries, to
2682     # make sure we see a stable and not transient situation; therefore
2683     # we force restart of the loop
2684     if (done or oneshot) and cumul_degraded and degr_retries > 0:
2685       logging.info("Degraded disks found, %d retries left", degr_retries)
2686       degr_retries -= 1
2687       time.sleep(1)
2688       continue
2689
2690     if done or oneshot:
2691       break
2692
2693     time.sleep(min(60, max_time))
2694
2695   if done:
2696     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
2697   return not cumul_degraded
2698
2699
2700 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
2701   """Check that mirrors are not degraded.
2702
2703   The ldisk parameter, if True, will change the test from the
2704   is_degraded attribute (which represents overall non-ok status for
2705   the device(s)) to the ldisk (representing the local storage status).
2706
2707   """
2708   lu.cfg.SetDiskID(dev, node)
2709
2710   result = True
2711
2712   if on_primary or dev.AssembleOnSecondary():
2713     rstats = lu.rpc.call_blockdev_find(node, dev)
2714     msg = rstats.fail_msg
2715     if msg:
2716       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
2717       result = False
2718     elif not rstats.payload:
2719       lu.LogWarning("Can't find disk on node %s", node)
2720       result = False
2721     else:
2722       if ldisk:
2723         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
2724       else:
2725         result = result and not rstats.payload.is_degraded
2726
2727   if dev.children:
2728     for child in dev.children:
2729       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
2730
2731   return result
2732
2733
2734 class LUDiagnoseOS(NoHooksLU):
2735   """Logical unit for OS diagnose/query.
2736
2737   """
2738   _OP_REQP = ["output_fields", "names"]
2739   REQ_BGL = False
2740   _FIELDS_STATIC = utils.FieldSet()
2741   _FIELDS_DYNAMIC = utils.FieldSet("name", "valid", "node_status", "variants",
2742                                    "parameters", "api_versions")
2743
2744   def ExpandNames(self):
2745     if self.op.names:
2746       raise errors.OpPrereqError("Selective OS query not supported",
2747                                  errors.ECODE_INVAL)
2748
2749     _CheckOutputFields(static=self._FIELDS_STATIC,
2750                        dynamic=self._FIELDS_DYNAMIC,
2751                        selected=self.op.output_fields)
2752
2753     # Lock all nodes, in shared mode
2754     # Temporary removal of locks, should be reverted later
2755     # TODO: reintroduce locks when they are lighter-weight
2756     self.needed_locks = {}
2757     #self.share_locks[locking.LEVEL_NODE] = 1
2758     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
2759
2760   def CheckPrereq(self):
2761     """Check prerequisites.
2762
2763     """
2764
2765   @staticmethod
2766   def _DiagnoseByOS(rlist):
2767     """Remaps a per-node return list into an a per-os per-node dictionary
2768
2769     @param rlist: a map with node names as keys and OS objects as values
2770
2771     @rtype: dict
2772     @return: a dictionary with osnames as keys and as value another
2773         map, with nodes as keys and tuples of (path, status, diagnose,
2774         variants, parameters, api_versions) as values, eg::
2775
2776           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
2777                                      (/srv/..., False, "invalid api")],
2778                            "node2": [(/srv/..., True, "", [], [])]}
2779           }
2780
2781     """
2782     all_os = {}
2783     # we build here the list of nodes that didn't fail the RPC (at RPC
2784     # level), so that nodes with a non-responding node daemon don't
2785     # make all OSes invalid
2786     good_nodes = [node_name for node_name in rlist
2787                   if not rlist[node_name].fail_msg]
2788     for node_name, nr in rlist.items():
2789       if nr.fail_msg or not nr.payload:
2790         continue
2791       for (name, path, status, diagnose, variants,
2792            params, api_versions) in nr.payload:
2793         if name not in all_os:
2794           # build a list of nodes for this os containing empty lists
2795           # for each node in node_list
2796           all_os[name] = {}
2797           for nname in good_nodes:
2798             all_os[name][nname] = []
2799         # convert params from [name, help] to (name, help)
2800         params = [tuple(v) for v in params]
2801         all_os[name][node_name].append((path, status, diagnose,
2802                                         variants, params, api_versions))
2803     return all_os
2804
2805   def Exec(self, feedback_fn):
2806     """Compute the list of OSes.
2807
2808     """
2809     valid_nodes = [node for node in self.cfg.GetOnlineNodeList()]
2810     node_data = self.rpc.call_os_diagnose(valid_nodes)
2811     pol = self._DiagnoseByOS(node_data)
2812     output = []
2813
2814     for os_name, os_data in pol.items():
2815       row = []
2816       valid = True
2817       (variants, params, api_versions) = null_state = (set(), set(), set())
2818       for idx, osl in enumerate(os_data.values()):
2819         valid = bool(valid and osl and osl[0][1])
2820         if not valid:
2821           (variants, params, api_versions) = null_state
2822           break
2823         node_variants, node_params, node_api = osl[0][3:6]
2824         if idx == 0: # first entry
2825           variants = set(node_variants)
2826           params = set(node_params)
2827           api_versions = set(node_api)
2828         else: # keep consistency
2829           variants.intersection_update(node_variants)
2830           params.intersection_update(node_params)
2831           api_versions.intersection_update(node_api)
2832
2833       for field in self.op.output_fields:
2834         if field == "name":
2835           val = os_name
2836         elif field == "valid":
2837           val = valid
2838         elif field == "node_status":
2839           # this is just a copy of the dict
2840           val = {}
2841           for node_name, nos_list in os_data.items():
2842             val[node_name] = nos_list
2843         elif field == "variants":
2844           val = list(variants)
2845         elif field == "parameters":
2846           val = list(params)
2847         elif field == "api_versions":
2848           val = list(api_versions)
2849         else:
2850           raise errors.ParameterError(field)
2851         row.append(val)
2852       output.append(row)
2853
2854     return output
2855
2856
2857 class LURemoveNode(LogicalUnit):
2858   """Logical unit for removing a node.
2859
2860   """
2861   HPATH = "node-remove"
2862   HTYPE = constants.HTYPE_NODE
2863   _OP_REQP = ["node_name"]
2864
2865   def BuildHooksEnv(self):
2866     """Build hooks env.
2867
2868     This doesn't run on the target node in the pre phase as a failed
2869     node would then be impossible to remove.
2870
2871     """
2872     env = {
2873       "OP_TARGET": self.op.node_name,
2874       "NODE_NAME": self.op.node_name,
2875       }
2876     all_nodes = self.cfg.GetNodeList()
2877     try:
2878       all_nodes.remove(self.op.node_name)
2879     except ValueError:
2880       logging.warning("Node %s which is about to be removed not found"
2881                       " in the all nodes list", self.op.node_name)
2882     return env, all_nodes, all_nodes
2883
2884   def CheckPrereq(self):
2885     """Check prerequisites.
2886
2887     This checks:
2888      - the node exists in the configuration
2889      - it does not have primary or secondary instances
2890      - it's not the master
2891
2892     Any errors are signaled by raising errors.OpPrereqError.
2893
2894     """
2895     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
2896     node = self.cfg.GetNodeInfo(self.op.node_name)
2897     assert node is not None
2898
2899     instance_list = self.cfg.GetInstanceList()
2900
2901     masternode = self.cfg.GetMasterNode()
2902     if node.name == masternode:
2903       raise errors.OpPrereqError("Node is the master node,"
2904                                  " you need to failover first.",
2905                                  errors.ECODE_INVAL)
2906
2907     for instance_name in instance_list:
2908       instance = self.cfg.GetInstanceInfo(instance_name)
2909       if node.name in instance.all_nodes:
2910         raise errors.OpPrereqError("Instance %s is still running on the node,"
2911                                    " please remove first." % instance_name,
2912                                    errors.ECODE_INVAL)
2913     self.op.node_name = node.name
2914     self.node = node
2915
2916   def Exec(self, feedback_fn):
2917     """Removes the node from the cluster.
2918
2919     """
2920     node = self.node
2921     logging.info("Stopping the node daemon and removing configs from node %s",
2922                  node.name)
2923
2924     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
2925
2926     # Promote nodes to master candidate as needed
2927     _AdjustCandidatePool(self, exceptions=[node.name])
2928     self.context.RemoveNode(node.name)
2929
2930     # Run post hooks on the node before it's removed
2931     hm = self.proc.hmclass(self.rpc.call_hooks_runner, self)
2932     try:
2933       hm.RunPhase(constants.HOOKS_PHASE_POST, [node.name])
2934     except:
2935       # pylint: disable-msg=W0702
2936       self.LogWarning("Errors occurred running hooks on %s" % node.name)
2937
2938     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
2939     msg = result.fail_msg
2940     if msg:
2941       self.LogWarning("Errors encountered on the remote node while leaving"
2942                       " the cluster: %s", msg)
2943
2944     # Remove node from our /etc/hosts
2945     if self.cfg.GetClusterInfo().modify_etc_hosts:
2946       # FIXME: this should be done via an rpc call to node daemon
2947       utils.RemoveHostFromEtcHosts(node.name)
2948       _RedistributeAncillaryFiles(self)
2949
2950
2951 class LUQueryNodes(NoHooksLU):
2952   """Logical unit for querying nodes.
2953
2954   """
2955   # pylint: disable-msg=W0142
2956   _OP_REQP = ["output_fields", "names", "use_locking"]
2957   REQ_BGL = False
2958
2959   _SIMPLE_FIELDS = ["name", "serial_no", "ctime", "mtime", "uuid",
2960                     "master_candidate", "offline", "drained"]
2961
2962   _FIELDS_DYNAMIC = utils.FieldSet(
2963     "dtotal", "dfree",
2964     "mtotal", "mnode", "mfree",
2965     "bootid",
2966     "ctotal", "cnodes", "csockets",
2967     )
2968
2969   _FIELDS_STATIC = utils.FieldSet(*[
2970     "pinst_cnt", "sinst_cnt",
2971     "pinst_list", "sinst_list",
2972     "pip", "sip", "tags",
2973     "master",
2974     "role"] + _SIMPLE_FIELDS
2975     )
2976
2977   def ExpandNames(self):
2978     _CheckOutputFields(static=self._FIELDS_STATIC,
2979                        dynamic=self._FIELDS_DYNAMIC,
2980                        selected=self.op.output_fields)
2981
2982     self.needed_locks = {}
2983     self.share_locks[locking.LEVEL_NODE] = 1
2984
2985     if self.op.names:
2986       self.wanted = _GetWantedNodes(self, self.op.names)
2987     else:
2988       self.wanted = locking.ALL_SET
2989
2990     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
2991     self.do_locking = self.do_node_query and self.op.use_locking
2992     if self.do_locking:
2993       # if we don't request only static fields, we need to lock the nodes
2994       self.needed_locks[locking.LEVEL_NODE] = self.wanted
2995
2996   def CheckPrereq(self):
2997     """Check prerequisites.
2998
2999     """
3000     # The validation of the node list is done in the _GetWantedNodes,
3001     # if non empty, and if empty, there's no validation to do
3002     pass
3003
3004   def Exec(self, feedback_fn):
3005     """Computes the list of nodes and their attributes.
3006
3007     """
3008     all_info = self.cfg.GetAllNodesInfo()
3009     if self.do_locking:
3010       nodenames = self.acquired_locks[locking.LEVEL_NODE]
3011     elif self.wanted != locking.ALL_SET:
3012       nodenames = self.wanted
3013       missing = set(nodenames).difference(all_info.keys())
3014       if missing:
3015         raise errors.OpExecError(
3016           "Some nodes were removed before retrieving their data: %s" % missing)
3017     else:
3018       nodenames = all_info.keys()
3019
3020     nodenames = utils.NiceSort(nodenames)
3021     nodelist = [all_info[name] for name in nodenames]
3022
3023     # begin data gathering
3024
3025     if self.do_node_query:
3026       live_data = {}
3027       node_data = self.rpc.call_node_info(nodenames, self.cfg.GetVGName(),
3028                                           self.cfg.GetHypervisorType())
3029       for name in nodenames:
3030         nodeinfo = node_data[name]
3031         if not nodeinfo.fail_msg and nodeinfo.payload:
3032           nodeinfo = nodeinfo.payload
3033           fn = utils.TryConvert
3034           live_data[name] = {
3035             "mtotal": fn(int, nodeinfo.get('memory_total', None)),
3036             "mnode": fn(int, nodeinfo.get('memory_dom0', None)),
3037             "mfree": fn(int, nodeinfo.get('memory_free', None)),
3038             "dtotal": fn(int, nodeinfo.get('vg_size', None)),
3039             "dfree": fn(int, nodeinfo.get('vg_free', None)),
3040             "ctotal": fn(int, nodeinfo.get('cpu_total', None)),
3041             "bootid": nodeinfo.get('bootid', None),
3042             "cnodes": fn(int, nodeinfo.get('cpu_nodes', None)),
3043             "csockets": fn(int, nodeinfo.get('cpu_sockets', None)),
3044             }
3045         else:
3046           live_data[name] = {}
3047     else:
3048       live_data = dict.fromkeys(nodenames, {})
3049
3050     node_to_primary = dict([(name, set()) for name in nodenames])
3051     node_to_secondary = dict([(name, set()) for name in nodenames])
3052
3053     inst_fields = frozenset(("pinst_cnt", "pinst_list",
3054                              "sinst_cnt", "sinst_list"))
3055     if inst_fields & frozenset(self.op.output_fields):
3056       inst_data = self.cfg.GetAllInstancesInfo()
3057
3058       for inst in inst_data.values():
3059         if inst.primary_node in node_to_primary:
3060           node_to_primary[inst.primary_node].add(inst.name)
3061         for secnode in inst.secondary_nodes:
3062           if secnode in node_to_secondary:
3063             node_to_secondary[secnode].add(inst.name)
3064
3065     master_node = self.cfg.GetMasterNode()
3066
3067     # end data gathering
3068
3069     output = []
3070     for node in nodelist:
3071       node_output = []
3072       for field in self.op.output_fields:
3073         if field in self._SIMPLE_FIELDS:
3074           val = getattr(node, field)
3075         elif field == "pinst_list":
3076           val = list(node_to_primary[node.name])
3077         elif field == "sinst_list":
3078           val = list(node_to_secondary[node.name])
3079         elif field == "pinst_cnt":
3080           val = len(node_to_primary[node.name])
3081         elif field == "sinst_cnt":
3082           val = len(node_to_secondary[node.name])
3083         elif field == "pip":
3084           val = node.primary_ip
3085         elif field == "sip":
3086           val = node.secondary_ip
3087         elif field == "tags":
3088           val = list(node.GetTags())
3089         elif field == "master":
3090           val = node.name == master_node
3091         elif self._FIELDS_DYNAMIC.Matches(field):
3092           val = live_data[node.name].get(field, None)
3093         elif field == "role":
3094           if node.name == master_node:
3095             val = "M"
3096           elif node.master_candidate:
3097             val = "C"
3098           elif node.drained:
3099             val = "D"
3100           elif node.offline:
3101             val = "O"
3102           else:
3103             val = "R"
3104         else:
3105           raise errors.ParameterError(field)
3106         node_output.append(val)
3107       output.append(node_output)
3108
3109     return output
3110
3111
3112 class LUQueryNodeVolumes(NoHooksLU):
3113   """Logical unit for getting volumes on node(s).
3114
3115   """
3116   _OP_REQP = ["nodes", "output_fields"]
3117   REQ_BGL = False
3118   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3119   _FIELDS_STATIC = utils.FieldSet("node")
3120
3121   def ExpandNames(self):
3122     _CheckOutputFields(static=self._FIELDS_STATIC,
3123                        dynamic=self._FIELDS_DYNAMIC,
3124                        selected=self.op.output_fields)
3125
3126     self.needed_locks = {}
3127     self.share_locks[locking.LEVEL_NODE] = 1
3128     if not self.op.nodes:
3129       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3130     else:
3131       self.needed_locks[locking.LEVEL_NODE] = \
3132         _GetWantedNodes(self, self.op.nodes)
3133
3134   def CheckPrereq(self):
3135     """Check prerequisites.
3136
3137     This checks that the fields required are valid output fields.
3138
3139     """
3140     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3141
3142   def Exec(self, feedback_fn):
3143     """Computes the list of nodes and their attributes.
3144
3145     """
3146     nodenames = self.nodes
3147     volumes = self.rpc.call_node_volumes(nodenames)
3148
3149     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3150              in self.cfg.GetInstanceList()]
3151
3152     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3153
3154     output = []
3155     for node in nodenames:
3156       nresult = volumes[node]
3157       if nresult.offline:
3158         continue
3159       msg = nresult.fail_msg
3160       if msg:
3161         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3162         continue
3163
3164       node_vols = nresult.payload[:]
3165       node_vols.sort(key=lambda vol: vol['dev'])
3166
3167       for vol in node_vols:
3168         node_output = []
3169         for field in self.op.output_fields:
3170           if field == "node":
3171             val = node
3172           elif field == "phys":
3173             val = vol['dev']
3174           elif field == "vg":
3175             val = vol['vg']
3176           elif field == "name":
3177             val = vol['name']
3178           elif field == "size":
3179             val = int(float(vol['size']))
3180           elif field == "instance":
3181             for inst in ilist:
3182               if node not in lv_by_node[inst]:
3183                 continue
3184               if vol['name'] in lv_by_node[inst][node]:
3185                 val = inst.name
3186                 break
3187             else:
3188               val = '-'
3189           else:
3190             raise errors.ParameterError(field)
3191           node_output.append(str(val))
3192
3193         output.append(node_output)
3194
3195     return output
3196
3197
3198 class LUQueryNodeStorage(NoHooksLU):
3199   """Logical unit for getting information on storage units on node(s).
3200
3201   """
3202   _OP_REQP = ["nodes", "storage_type", "output_fields"]
3203   REQ_BGL = False
3204   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
3205
3206   def CheckArguments(self):
3207     _CheckStorageType(self.op.storage_type)
3208
3209     _CheckOutputFields(static=self._FIELDS_STATIC,
3210                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
3211                        selected=self.op.output_fields)
3212
3213   def ExpandNames(self):
3214     self.needed_locks = {}
3215     self.share_locks[locking.LEVEL_NODE] = 1
3216
3217     if self.op.nodes:
3218       self.needed_locks[locking.LEVEL_NODE] = \
3219         _GetWantedNodes(self, self.op.nodes)
3220     else:
3221       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3222
3223   def CheckPrereq(self):
3224     """Check prerequisites.
3225
3226     This checks that the fields required are valid output fields.
3227
3228     """
3229     self.op.name = getattr(self.op, "name", None)
3230
3231     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
3232
3233   def Exec(self, feedback_fn):
3234     """Computes the list of nodes and their attributes.
3235
3236     """
3237     # Always get name to sort by
3238     if constants.SF_NAME in self.op.output_fields:
3239       fields = self.op.output_fields[:]
3240     else:
3241       fields = [constants.SF_NAME] + self.op.output_fields
3242
3243     # Never ask for node or type as it's only known to the LU
3244     for extra in [constants.SF_NODE, constants.SF_TYPE]:
3245       while extra in fields:
3246         fields.remove(extra)
3247
3248     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
3249     name_idx = field_idx[constants.SF_NAME]
3250
3251     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3252     data = self.rpc.call_storage_list(self.nodes,
3253                                       self.op.storage_type, st_args,
3254                                       self.op.name, fields)
3255
3256     result = []
3257
3258     for node in utils.NiceSort(self.nodes):
3259       nresult = data[node]
3260       if nresult.offline:
3261         continue
3262
3263       msg = nresult.fail_msg
3264       if msg:
3265         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
3266         continue
3267
3268       rows = dict([(row[name_idx], row) for row in nresult.payload])
3269
3270       for name in utils.NiceSort(rows.keys()):
3271         row = rows[name]
3272
3273         out = []
3274
3275         for field in self.op.output_fields:
3276           if field == constants.SF_NODE:
3277             val = node
3278           elif field == constants.SF_TYPE:
3279             val = self.op.storage_type
3280           elif field in field_idx:
3281             val = row[field_idx[field]]
3282           else:
3283             raise errors.ParameterError(field)
3284
3285           out.append(val)
3286
3287         result.append(out)
3288
3289     return result
3290
3291
3292 class LUModifyNodeStorage(NoHooksLU):
3293   """Logical unit for modifying a storage volume on a node.
3294
3295   """
3296   _OP_REQP = ["node_name", "storage_type", "name", "changes"]
3297   REQ_BGL = False
3298
3299   def CheckArguments(self):
3300     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3301
3302     _CheckStorageType(self.op.storage_type)
3303
3304   def ExpandNames(self):
3305     self.needed_locks = {
3306       locking.LEVEL_NODE: self.op.node_name,
3307       }
3308
3309   def CheckPrereq(self):
3310     """Check prerequisites.
3311
3312     """
3313     storage_type = self.op.storage_type
3314
3315     try:
3316       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
3317     except KeyError:
3318       raise errors.OpPrereqError("Storage units of type '%s' can not be"
3319                                  " modified" % storage_type,
3320                                  errors.ECODE_INVAL)
3321
3322     diff = set(self.op.changes.keys()) - modifiable
3323     if diff:
3324       raise errors.OpPrereqError("The following fields can not be modified for"
3325                                  " storage units of type '%s': %r" %
3326                                  (storage_type, list(diff)),
3327                                  errors.ECODE_INVAL)
3328
3329   def Exec(self, feedback_fn):
3330     """Computes the list of nodes and their attributes.
3331
3332     """
3333     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
3334     result = self.rpc.call_storage_modify(self.op.node_name,
3335                                           self.op.storage_type, st_args,
3336                                           self.op.name, self.op.changes)
3337     result.Raise("Failed to modify storage unit '%s' on %s" %
3338                  (self.op.name, self.op.node_name))
3339
3340
3341 class LUAddNode(LogicalUnit):
3342   """Logical unit for adding node to the cluster.
3343
3344   """
3345   HPATH = "node-add"
3346   HTYPE = constants.HTYPE_NODE
3347   _OP_REQP = ["node_name"]
3348
3349   def CheckArguments(self):
3350     # validate/normalize the node name
3351     self.op.node_name = utils.HostInfo.NormalizeName(self.op.node_name)
3352
3353   def BuildHooksEnv(self):
3354     """Build hooks env.
3355
3356     This will run on all nodes before, and on all nodes + the new node after.
3357
3358     """
3359     env = {
3360       "OP_TARGET": self.op.node_name,
3361       "NODE_NAME": self.op.node_name,
3362       "NODE_PIP": self.op.primary_ip,
3363       "NODE_SIP": self.op.secondary_ip,
3364       }
3365     nodes_0 = self.cfg.GetNodeList()
3366     nodes_1 = nodes_0 + [self.op.node_name, ]
3367     return env, nodes_0, nodes_1
3368
3369   def CheckPrereq(self):
3370     """Check prerequisites.
3371
3372     This checks:
3373      - the new node is not already in the config
3374      - it is resolvable
3375      - its parameters (single/dual homed) matches the cluster
3376
3377     Any errors are signaled by raising errors.OpPrereqError.
3378
3379     """
3380     node_name = self.op.node_name
3381     cfg = self.cfg
3382
3383     dns_data = utils.GetHostInfo(node_name)
3384
3385     node = dns_data.name
3386     primary_ip = self.op.primary_ip = dns_data.ip
3387     secondary_ip = getattr(self.op, "secondary_ip", None)
3388     if secondary_ip is None:
3389       secondary_ip = primary_ip
3390     if not utils.IsValidIP(secondary_ip):
3391       raise errors.OpPrereqError("Invalid secondary IP given",
3392                                  errors.ECODE_INVAL)
3393     self.op.secondary_ip = secondary_ip
3394
3395     node_list = cfg.GetNodeList()
3396     if not self.op.readd and node in node_list:
3397       raise errors.OpPrereqError("Node %s is already in the configuration" %
3398                                  node, errors.ECODE_EXISTS)
3399     elif self.op.readd and node not in node_list:
3400       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
3401                                  errors.ECODE_NOENT)
3402
3403     self.changed_primary_ip = False
3404
3405     for existing_node_name in node_list:
3406       existing_node = cfg.GetNodeInfo(existing_node_name)
3407
3408       if self.op.readd and node == existing_node_name:
3409         if existing_node.secondary_ip != secondary_ip:
3410           raise errors.OpPrereqError("Readded node doesn't have the same IP"
3411                                      " address configuration as before",
3412                                      errors.ECODE_INVAL)
3413         if existing_node.primary_ip != primary_ip:
3414           self.changed_primary_ip = True
3415
3416         continue
3417
3418       if (existing_node.primary_ip == primary_ip or
3419           existing_node.secondary_ip == primary_ip or
3420           existing_node.primary_ip == secondary_ip or
3421           existing_node.secondary_ip == secondary_ip):
3422         raise errors.OpPrereqError("New node ip address(es) conflict with"
3423                                    " existing node %s" % existing_node.name,
3424                                    errors.ECODE_NOTUNIQUE)
3425
3426     # check that the type of the node (single versus dual homed) is the
3427     # same as for the master
3428     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
3429     master_singlehomed = myself.secondary_ip == myself.primary_ip
3430     newbie_singlehomed = secondary_ip == primary_ip
3431     if master_singlehomed != newbie_singlehomed:
3432       if master_singlehomed:
3433         raise errors.OpPrereqError("The master has no private ip but the"
3434                                    " new node has one",
3435                                    errors.ECODE_INVAL)
3436       else:
3437         raise errors.OpPrereqError("The master has a private ip but the"
3438                                    " new node doesn't have one",
3439                                    errors.ECODE_INVAL)
3440
3441     # checks reachability
3442     if not utils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
3443       raise errors.OpPrereqError("Node not reachable by ping",
3444                                  errors.ECODE_ENVIRON)
3445
3446     if not newbie_singlehomed:
3447       # check reachability from my secondary ip to newbie's secondary ip
3448       if not utils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
3449                            source=myself.secondary_ip):
3450         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
3451                                    " based ping to noded port",
3452                                    errors.ECODE_ENVIRON)
3453
3454     if self.op.readd:
3455       exceptions = [node]
3456     else:
3457       exceptions = []
3458
3459     self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
3460
3461     if self.op.readd:
3462       self.new_node = self.cfg.GetNodeInfo(node)
3463       assert self.new_node is not None, "Can't retrieve locked node %s" % node
3464     else:
3465       self.new_node = objects.Node(name=node,
3466                                    primary_ip=primary_ip,
3467                                    secondary_ip=secondary_ip,
3468                                    master_candidate=self.master_candidate,
3469                                    offline=False, drained=False)
3470
3471   def Exec(self, feedback_fn):
3472     """Adds the new node to the cluster.
3473
3474     """
3475     new_node = self.new_node
3476     node = new_node.name
3477
3478     # for re-adds, reset the offline/drained/master-candidate flags;
3479     # we need to reset here, otherwise offline would prevent RPC calls
3480     # later in the procedure; this also means that if the re-add
3481     # fails, we are left with a non-offlined, broken node
3482     if self.op.readd:
3483       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
3484       self.LogInfo("Readding a node, the offline/drained flags were reset")
3485       # if we demote the node, we do cleanup later in the procedure
3486       new_node.master_candidate = self.master_candidate
3487       if self.changed_primary_ip:
3488         new_node.primary_ip = self.op.primary_ip
3489
3490     # notify the user about any possible mc promotion
3491     if new_node.master_candidate:
3492       self.LogInfo("Node will be a master candidate")
3493
3494     # check connectivity
3495     result = self.rpc.call_version([node])[node]
3496     result.Raise("Can't get version information from node %s" % node)
3497     if constants.PROTOCOL_VERSION == result.payload:
3498       logging.info("Communication to node %s fine, sw version %s match",
3499                    node, result.payload)
3500     else:
3501       raise errors.OpExecError("Version mismatch master version %s,"
3502                                " node version %s" %
3503                                (constants.PROTOCOL_VERSION, result.payload))
3504
3505     # setup ssh on node
3506     if self.cfg.GetClusterInfo().modify_ssh_setup:
3507       logging.info("Copy ssh key to node %s", node)
3508       priv_key, pub_key, _ = ssh.GetUserFiles(constants.GANETI_RUNAS)
3509       keyarray = []
3510       keyfiles = [constants.SSH_HOST_DSA_PRIV, constants.SSH_HOST_DSA_PUB,
3511                   constants.SSH_HOST_RSA_PRIV, constants.SSH_HOST_RSA_PUB,
3512                   priv_key, pub_key]
3513
3514       for i in keyfiles:
3515         keyarray.append(utils.ReadFile(i))
3516
3517       result = self.rpc.call_node_add(node, keyarray[0], keyarray[1],
3518                                       keyarray[2], keyarray[3], keyarray[4],
3519                                       keyarray[5])
3520       result.Raise("Cannot transfer ssh keys to the new node")
3521
3522     # Add node to our /etc/hosts, and add key to known_hosts
3523     if self.cfg.GetClusterInfo().modify_etc_hosts:
3524       # FIXME: this should be done via an rpc call to node daemon
3525       utils.AddHostToEtcHosts(new_node.name)
3526
3527     if new_node.secondary_ip != new_node.primary_ip:
3528       result = self.rpc.call_node_has_ip_address(new_node.name,
3529                                                  new_node.secondary_ip)
3530       result.Raise("Failure checking secondary ip on node %s" % new_node.name,
3531                    prereq=True, ecode=errors.ECODE_ENVIRON)
3532       if not result.payload:
3533         raise errors.OpExecError("Node claims it doesn't have the secondary ip"
3534                                  " you gave (%s). Please fix and re-run this"
3535                                  " command." % new_node.secondary_ip)
3536
3537     node_verify_list = [self.cfg.GetMasterNode()]
3538     node_verify_param = {
3539       constants.NV_NODELIST: [node],
3540       # TODO: do a node-net-test as well?
3541     }
3542
3543     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
3544                                        self.cfg.GetClusterName())
3545     for verifier in node_verify_list:
3546       result[verifier].Raise("Cannot communicate with node %s" % verifier)
3547       nl_payload = result[verifier].payload[constants.NV_NODELIST]
3548       if nl_payload:
3549         for failed in nl_payload:
3550           feedback_fn("ssh/hostname verification failed"
3551                       " (checking from %s): %s" %
3552                       (verifier, nl_payload[failed]))
3553         raise errors.OpExecError("ssh/hostname verification failed.")
3554
3555     if self.op.readd:
3556       _RedistributeAncillaryFiles(self)
3557       self.context.ReaddNode(new_node)
3558       # make sure we redistribute the config
3559       self.cfg.Update(new_node, feedback_fn)
3560       # and make sure the new node will not have old files around
3561       if not new_node.master_candidate:
3562         result = self.rpc.call_node_demote_from_mc(new_node.name)
3563         msg = result.fail_msg
3564         if msg:
3565           self.LogWarning("Node failed to demote itself from master"
3566                           " candidate status: %s" % msg)
3567     else:
3568       _RedistributeAncillaryFiles(self, additional_nodes=[node])
3569       self.context.AddNode(new_node, self.proc.GetECId())
3570
3571
3572 class LUSetNodeParams(LogicalUnit):
3573   """Modifies the parameters of a node.
3574
3575   """
3576   HPATH = "node-modify"
3577   HTYPE = constants.HTYPE_NODE
3578   _OP_REQP = ["node_name"]
3579   REQ_BGL = False
3580
3581   def CheckArguments(self):
3582     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3583     _CheckBooleanOpField(self.op, 'master_candidate')
3584     _CheckBooleanOpField(self.op, 'offline')
3585     _CheckBooleanOpField(self.op, 'drained')
3586     _CheckBooleanOpField(self.op, 'auto_promote')
3587     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained]
3588     if all_mods.count(None) == 3:
3589       raise errors.OpPrereqError("Please pass at least one modification",
3590                                  errors.ECODE_INVAL)
3591     if all_mods.count(True) > 1:
3592       raise errors.OpPrereqError("Can't set the node into more than one"
3593                                  " state at the same time",
3594                                  errors.ECODE_INVAL)
3595
3596     # Boolean value that tells us whether we're offlining or draining the node
3597     self.offline_or_drain = (self.op.offline == True or
3598                              self.op.drained == True)
3599     self.deoffline_or_drain = (self.op.offline == False or
3600                                self.op.drained == False)
3601     self.might_demote = (self.op.master_candidate == False or
3602                          self.offline_or_drain)
3603
3604     self.lock_all = self.op.auto_promote and self.might_demote
3605
3606
3607   def ExpandNames(self):
3608     if self.lock_all:
3609       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
3610     else:
3611       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
3612
3613   def BuildHooksEnv(self):
3614     """Build hooks env.
3615
3616     This runs on the master node.
3617
3618     """
3619     env = {
3620       "OP_TARGET": self.op.node_name,
3621       "MASTER_CANDIDATE": str(self.op.master_candidate),
3622       "OFFLINE": str(self.op.offline),
3623       "DRAINED": str(self.op.drained),
3624       }
3625     nl = [self.cfg.GetMasterNode(),
3626           self.op.node_name]
3627     return env, nl, nl
3628
3629   def CheckPrereq(self):
3630     """Check prerequisites.
3631
3632     This only checks the instance list against the existing names.
3633
3634     """
3635     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
3636
3637     if (self.op.master_candidate is not None or
3638         self.op.drained is not None or
3639         self.op.offline is not None):
3640       # we can't change the master's node flags
3641       if self.op.node_name == self.cfg.GetMasterNode():
3642         raise errors.OpPrereqError("The master role can be changed"
3643                                    " only via masterfailover",
3644                                    errors.ECODE_INVAL)
3645
3646
3647     if node.master_candidate and self.might_demote and not self.lock_all:
3648       assert not self.op.auto_promote, "auto-promote set but lock_all not"
3649       # check if after removing the current node, we're missing master
3650       # candidates
3651       (mc_remaining, mc_should, _) = \
3652           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
3653       if mc_remaining < mc_should:
3654         raise errors.OpPrereqError("Not enough master candidates, please"
3655                                    " pass auto_promote to allow promotion",
3656                                    errors.ECODE_INVAL)
3657
3658     if (self.op.master_candidate == True and
3659         ((node.offline and not self.op.offline == False) or
3660          (node.drained and not self.op.drained == False))):
3661       raise errors.OpPrereqError("Node '%s' is offline or drained, can't set"
3662                                  " to master_candidate" % node.name,
3663                                  errors.ECODE_INVAL)
3664
3665     # If we're being deofflined/drained, we'll MC ourself if needed
3666     if (self.deoffline_or_drain and not self.offline_or_drain and not
3667         self.op.master_candidate == True and not node.master_candidate):
3668       self.op.master_candidate = _DecideSelfPromotion(self)
3669       if self.op.master_candidate:
3670         self.LogInfo("Autopromoting node to master candidate")
3671
3672     return
3673
3674   def Exec(self, feedback_fn):
3675     """Modifies a node.
3676
3677     """
3678     node = self.node
3679
3680     result = []
3681     changed_mc = False
3682
3683     if self.op.offline is not None:
3684       node.offline = self.op.offline
3685       result.append(("offline", str(self.op.offline)))
3686       if self.op.offline == True:
3687         if node.master_candidate:
3688           node.master_candidate = False
3689           changed_mc = True
3690           result.append(("master_candidate", "auto-demotion due to offline"))
3691         if node.drained:
3692           node.drained = False
3693           result.append(("drained", "clear drained status due to offline"))
3694
3695     if self.op.master_candidate is not None:
3696       node.master_candidate = self.op.master_candidate
3697       changed_mc = True
3698       result.append(("master_candidate", str(self.op.master_candidate)))
3699       if self.op.master_candidate == False:
3700         rrc = self.rpc.call_node_demote_from_mc(node.name)
3701         msg = rrc.fail_msg
3702         if msg:
3703           self.LogWarning("Node failed to demote itself: %s" % msg)
3704
3705     if self.op.drained is not None:
3706       node.drained = self.op.drained
3707       result.append(("drained", str(self.op.drained)))
3708       if self.op.drained == True:
3709         if node.master_candidate:
3710           node.master_candidate = False
3711           changed_mc = True
3712           result.append(("master_candidate", "auto-demotion due to drain"))
3713           rrc = self.rpc.call_node_demote_from_mc(node.name)
3714           msg = rrc.fail_msg
3715           if msg:
3716             self.LogWarning("Node failed to demote itself: %s" % msg)
3717         if node.offline:
3718           node.offline = False
3719           result.append(("offline", "clear offline status due to drain"))
3720
3721     # we locked all nodes, we adjust the CP before updating this node
3722     if self.lock_all:
3723       _AdjustCandidatePool(self, [node.name])
3724
3725     # this will trigger configuration file update, if needed
3726     self.cfg.Update(node, feedback_fn)
3727
3728     # this will trigger job queue propagation or cleanup
3729     if changed_mc:
3730       self.context.ReaddNode(node)
3731
3732     return result
3733
3734
3735 class LUPowercycleNode(NoHooksLU):
3736   """Powercycles a node.
3737
3738   """
3739   _OP_REQP = ["node_name", "force"]
3740   REQ_BGL = False
3741
3742   def CheckArguments(self):
3743     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3744     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
3745       raise errors.OpPrereqError("The node is the master and the force"
3746                                  " parameter was not set",
3747                                  errors.ECODE_INVAL)
3748
3749   def ExpandNames(self):
3750     """Locking for PowercycleNode.
3751
3752     This is a last-resort option and shouldn't block on other
3753     jobs. Therefore, we grab no locks.
3754
3755     """
3756     self.needed_locks = {}
3757
3758   def CheckPrereq(self):
3759     """Check prerequisites.
3760
3761     This LU has no prereqs.
3762
3763     """
3764     pass
3765
3766   def Exec(self, feedback_fn):
3767     """Reboots a node.
3768
3769     """
3770     result = self.rpc.call_node_powercycle(self.op.node_name,
3771                                            self.cfg.GetHypervisorType())
3772     result.Raise("Failed to schedule the reboot")
3773     return result.payload
3774
3775
3776 class LUQueryClusterInfo(NoHooksLU):
3777   """Query cluster configuration.
3778
3779   """
3780   _OP_REQP = []
3781   REQ_BGL = False
3782
3783   def ExpandNames(self):
3784     self.needed_locks = {}
3785
3786   def CheckPrereq(self):
3787     """No prerequsites needed for this LU.
3788
3789     """
3790     pass
3791
3792   def Exec(self, feedback_fn):
3793     """Return cluster config.
3794
3795     """
3796     cluster = self.cfg.GetClusterInfo()
3797     os_hvp = {}
3798
3799     # Filter just for enabled hypervisors
3800     for os_name, hv_dict in cluster.os_hvp.items():
3801       os_hvp[os_name] = {}
3802       for hv_name, hv_params in hv_dict.items():
3803         if hv_name in cluster.enabled_hypervisors:
3804           os_hvp[os_name][hv_name] = hv_params
3805
3806     result = {
3807       "software_version": constants.RELEASE_VERSION,
3808       "protocol_version": constants.PROTOCOL_VERSION,
3809       "config_version": constants.CONFIG_VERSION,
3810       "os_api_version": max(constants.OS_API_VERSIONS),
3811       "export_version": constants.EXPORT_VERSION,
3812       "architecture": (platform.architecture()[0], platform.machine()),
3813       "name": cluster.cluster_name,
3814       "master": cluster.master_node,
3815       "default_hypervisor": cluster.enabled_hypervisors[0],
3816       "enabled_hypervisors": cluster.enabled_hypervisors,
3817       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
3818                         for hypervisor_name in cluster.enabled_hypervisors]),
3819       "os_hvp": os_hvp,
3820       "beparams": cluster.beparams,
3821       "osparams": cluster.osparams,
3822       "nicparams": cluster.nicparams,
3823       "candidate_pool_size": cluster.candidate_pool_size,
3824       "master_netdev": cluster.master_netdev,
3825       "volume_group_name": cluster.volume_group_name,
3826       "file_storage_dir": cluster.file_storage_dir,
3827       "maintain_node_health": cluster.maintain_node_health,
3828       "ctime": cluster.ctime,
3829       "mtime": cluster.mtime,
3830       "uuid": cluster.uuid,
3831       "tags": list(cluster.GetTags()),
3832       "uid_pool": cluster.uid_pool,
3833       }
3834
3835     return result
3836
3837
3838 class LUQueryConfigValues(NoHooksLU):
3839   """Return configuration values.
3840
3841   """
3842   _OP_REQP = []
3843   REQ_BGL = False
3844   _FIELDS_DYNAMIC = utils.FieldSet()
3845   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
3846                                   "watcher_pause")
3847
3848   def ExpandNames(self):
3849     self.needed_locks = {}
3850
3851     _CheckOutputFields(static=self._FIELDS_STATIC,
3852                        dynamic=self._FIELDS_DYNAMIC,
3853                        selected=self.op.output_fields)
3854
3855   def CheckPrereq(self):
3856     """No prerequisites.
3857
3858     """
3859     pass
3860
3861   def Exec(self, feedback_fn):
3862     """Dump a representation of the cluster config to the standard output.
3863
3864     """
3865     values = []
3866     for field in self.op.output_fields:
3867       if field == "cluster_name":
3868         entry = self.cfg.GetClusterName()
3869       elif field == "master_node":
3870         entry = self.cfg.GetMasterNode()
3871       elif field == "drain_flag":
3872         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
3873       elif field == "watcher_pause":
3874         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
3875       else:
3876         raise errors.ParameterError(field)
3877       values.append(entry)
3878     return values
3879
3880
3881 class LUActivateInstanceDisks(NoHooksLU):
3882   """Bring up an instance's disks.
3883
3884   """
3885   _OP_REQP = ["instance_name"]
3886   REQ_BGL = False
3887
3888   def ExpandNames(self):
3889     self._ExpandAndLockInstance()
3890     self.needed_locks[locking.LEVEL_NODE] = []
3891     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3892
3893   def DeclareLocks(self, level):
3894     if level == locking.LEVEL_NODE:
3895       self._LockInstancesNodes()
3896
3897   def CheckPrereq(self):
3898     """Check prerequisites.
3899
3900     This checks that the instance is in the cluster.
3901
3902     """
3903     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
3904     assert self.instance is not None, \
3905       "Cannot retrieve locked instance %s" % self.op.instance_name
3906     _CheckNodeOnline(self, self.instance.primary_node)
3907     if not hasattr(self.op, "ignore_size"):
3908       self.op.ignore_size = False
3909
3910   def Exec(self, feedback_fn):
3911     """Activate the disks.
3912
3913     """
3914     disks_ok, disks_info = \
3915               _AssembleInstanceDisks(self, self.instance,
3916                                      ignore_size=self.op.ignore_size)
3917     if not disks_ok:
3918       raise errors.OpExecError("Cannot activate block devices")
3919
3920     return disks_info
3921
3922
3923 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
3924                            ignore_size=False):
3925   """Prepare the block devices for an instance.
3926
3927   This sets up the block devices on all nodes.
3928
3929   @type lu: L{LogicalUnit}
3930   @param lu: the logical unit on whose behalf we execute
3931   @type instance: L{objects.Instance}
3932   @param instance: the instance for whose disks we assemble
3933   @type disks: list of L{objects.Disk} or None
3934   @param disks: which disks to assemble (or all, if None)
3935   @type ignore_secondaries: boolean
3936   @param ignore_secondaries: if true, errors on secondary nodes
3937       won't result in an error return from the function
3938   @type ignore_size: boolean
3939   @param ignore_size: if true, the current known size of the disk
3940       will not be used during the disk activation, useful for cases
3941       when the size is wrong
3942   @return: False if the operation failed, otherwise a list of
3943       (host, instance_visible_name, node_visible_name)
3944       with the mapping from node devices to instance devices
3945
3946   """
3947   device_info = []
3948   disks_ok = True
3949   iname = instance.name
3950   disks = _ExpandCheckDisks(instance, disks)
3951
3952   # With the two passes mechanism we try to reduce the window of
3953   # opportunity for the race condition of switching DRBD to primary
3954   # before handshaking occured, but we do not eliminate it
3955
3956   # The proper fix would be to wait (with some limits) until the
3957   # connection has been made and drbd transitions from WFConnection
3958   # into any other network-connected state (Connected, SyncTarget,
3959   # SyncSource, etc.)
3960
3961   # 1st pass, assemble on all nodes in secondary mode
3962   for inst_disk in disks:
3963     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3964       if ignore_size:
3965         node_disk = node_disk.Copy()
3966         node_disk.UnsetSize()
3967       lu.cfg.SetDiskID(node_disk, node)
3968       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False)
3969       msg = result.fail_msg
3970       if msg:
3971         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3972                            " (is_primary=False, pass=1): %s",
3973                            inst_disk.iv_name, node, msg)
3974         if not ignore_secondaries:
3975           disks_ok = False
3976
3977   # FIXME: race condition on drbd migration to primary
3978
3979   # 2nd pass, do only the primary node
3980   for inst_disk in disks:
3981     dev_path = None
3982
3983     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
3984       if node != instance.primary_node:
3985         continue
3986       if ignore_size:
3987         node_disk = node_disk.Copy()
3988         node_disk.UnsetSize()
3989       lu.cfg.SetDiskID(node_disk, node)
3990       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True)
3991       msg = result.fail_msg
3992       if msg:
3993         lu.proc.LogWarning("Could not prepare block device %s on node %s"
3994                            " (is_primary=True, pass=2): %s",
3995                            inst_disk.iv_name, node, msg)
3996         disks_ok = False
3997       else:
3998         dev_path = result.payload
3999
4000     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
4001
4002   # leave the disks configured for the primary node
4003   # this is a workaround that would be fixed better by
4004   # improving the logical/physical id handling
4005   for disk in disks:
4006     lu.cfg.SetDiskID(disk, instance.primary_node)
4007
4008   return disks_ok, device_info
4009
4010
4011 def _StartInstanceDisks(lu, instance, force):
4012   """Start the disks of an instance.
4013
4014   """
4015   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
4016                                            ignore_secondaries=force)
4017   if not disks_ok:
4018     _ShutdownInstanceDisks(lu, instance)
4019     if force is not None and not force:
4020       lu.proc.LogWarning("", hint="If the message above refers to a"
4021                          " secondary node,"
4022                          " you can retry the operation using '--force'.")
4023     raise errors.OpExecError("Disk consistency error")
4024
4025
4026 class LUDeactivateInstanceDisks(NoHooksLU):
4027   """Shutdown an instance's disks.
4028
4029   """
4030   _OP_REQP = ["instance_name"]
4031   REQ_BGL = False
4032
4033   def ExpandNames(self):
4034     self._ExpandAndLockInstance()
4035     self.needed_locks[locking.LEVEL_NODE] = []
4036     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4037
4038   def DeclareLocks(self, level):
4039     if level == locking.LEVEL_NODE:
4040       self._LockInstancesNodes()
4041
4042   def CheckPrereq(self):
4043     """Check prerequisites.
4044
4045     This checks that the instance is in the cluster.
4046
4047     """
4048     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4049     assert self.instance is not None, \
4050       "Cannot retrieve locked instance %s" % self.op.instance_name
4051
4052   def Exec(self, feedback_fn):
4053     """Deactivate the disks
4054
4055     """
4056     instance = self.instance
4057     _SafeShutdownInstanceDisks(self, instance)
4058
4059
4060 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
4061   """Shutdown block devices of an instance.
4062
4063   This function checks if an instance is running, before calling
4064   _ShutdownInstanceDisks.
4065
4066   """
4067   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
4068   _ShutdownInstanceDisks(lu, instance, disks=disks)
4069
4070
4071 def _ExpandCheckDisks(instance, disks):
4072   """Return the instance disks selected by the disks list
4073
4074   @type disks: list of L{objects.Disk} or None
4075   @param disks: selected disks
4076   @rtype: list of L{objects.Disk}
4077   @return: selected instance disks to act on
4078
4079   """
4080   if disks is None:
4081     return instance.disks
4082   else:
4083     if not set(disks).issubset(instance.disks):
4084       raise errors.ProgrammerError("Can only act on disks belonging to the"
4085                                    " target instance")
4086     return disks
4087
4088
4089 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
4090   """Shutdown block devices of an instance.
4091
4092   This does the shutdown on all nodes of the instance.
4093
4094   If the ignore_primary is false, errors on the primary node are
4095   ignored.
4096
4097   """
4098   all_result = True
4099   disks = _ExpandCheckDisks(instance, disks)
4100
4101   for disk in disks:
4102     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
4103       lu.cfg.SetDiskID(top_disk, node)
4104       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
4105       msg = result.fail_msg
4106       if msg:
4107         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
4108                       disk.iv_name, node, msg)
4109         if not ignore_primary or node != instance.primary_node:
4110           all_result = False
4111   return all_result
4112
4113
4114 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
4115   """Checks if a node has enough free memory.
4116
4117   This function check if a given node has the needed amount of free
4118   memory. In case the node has less memory or we cannot get the
4119   information from the node, this function raise an OpPrereqError
4120   exception.
4121
4122   @type lu: C{LogicalUnit}
4123   @param lu: a logical unit from which we get configuration data
4124   @type node: C{str}
4125   @param node: the node to check
4126   @type reason: C{str}
4127   @param reason: string to use in the error message
4128   @type requested: C{int}
4129   @param requested: the amount of memory in MiB to check for
4130   @type hypervisor_name: C{str}
4131   @param hypervisor_name: the hypervisor to ask for memory stats
4132   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
4133       we cannot check the node
4134
4135   """
4136   nodeinfo = lu.rpc.call_node_info([node], lu.cfg.GetVGName(), hypervisor_name)
4137   nodeinfo[node].Raise("Can't get data from node %s" % node,
4138                        prereq=True, ecode=errors.ECODE_ENVIRON)
4139   free_mem = nodeinfo[node].payload.get('memory_free', None)
4140   if not isinstance(free_mem, int):
4141     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
4142                                " was '%s'" % (node, free_mem),
4143                                errors.ECODE_ENVIRON)
4144   if requested > free_mem:
4145     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
4146                                " needed %s MiB, available %s MiB" %
4147                                (node, reason, requested, free_mem),
4148                                errors.ECODE_NORES)
4149
4150
4151 def _CheckNodesFreeDisk(lu, nodenames, requested):
4152   """Checks if nodes have enough free disk space in the default VG.
4153
4154   This function check if all given nodes have the needed amount of
4155   free disk. In case any node has less disk or we cannot get the
4156   information from the node, this function raise an OpPrereqError
4157   exception.
4158
4159   @type lu: C{LogicalUnit}
4160   @param lu: a logical unit from which we get configuration data
4161   @type nodenames: C{list}
4162   @param nodenames: the list of node names to check
4163   @type requested: C{int}
4164   @param requested: the amount of disk in MiB to check for
4165   @raise errors.OpPrereqError: if the node doesn't have enough disk, or
4166       we cannot check the node
4167
4168   """
4169   nodeinfo = lu.rpc.call_node_info(nodenames, lu.cfg.GetVGName(),
4170                                    lu.cfg.GetHypervisorType())
4171   for node in nodenames:
4172     info = nodeinfo[node]
4173     info.Raise("Cannot get current information from node %s" % node,
4174                prereq=True, ecode=errors.ECODE_ENVIRON)
4175     vg_free = info.payload.get("vg_free", None)
4176     if not isinstance(vg_free, int):
4177       raise errors.OpPrereqError("Can't compute free disk space on node %s,"
4178                                  " result was '%s'" % (node, vg_free),
4179                                  errors.ECODE_ENVIRON)
4180     if requested > vg_free:
4181       raise errors.OpPrereqError("Not enough disk space on target node %s:"
4182                                  " required %d MiB, available %d MiB" %
4183                                  (node, requested, vg_free),
4184                                  errors.ECODE_NORES)
4185
4186
4187 class LUStartupInstance(LogicalUnit):
4188   """Starts an instance.
4189
4190   """
4191   HPATH = "instance-start"
4192   HTYPE = constants.HTYPE_INSTANCE
4193   _OP_REQP = ["instance_name", "force"]
4194   REQ_BGL = False
4195
4196   def ExpandNames(self):
4197     self._ExpandAndLockInstance()
4198
4199   def BuildHooksEnv(self):
4200     """Build hooks env.
4201
4202     This runs on master, primary and secondary nodes of the instance.
4203
4204     """
4205     env = {
4206       "FORCE": self.op.force,
4207       }
4208     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4209     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4210     return env, nl, nl
4211
4212   def CheckPrereq(self):
4213     """Check prerequisites.
4214
4215     This checks that the instance is in the cluster.
4216
4217     """
4218     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4219     assert self.instance is not None, \
4220       "Cannot retrieve locked instance %s" % self.op.instance_name
4221
4222     # extra beparams
4223     self.beparams = getattr(self.op, "beparams", {})
4224     if self.beparams:
4225       if not isinstance(self.beparams, dict):
4226         raise errors.OpPrereqError("Invalid beparams passed: %s, expected"
4227                                    " dict" % (type(self.beparams), ),
4228                                    errors.ECODE_INVAL)
4229       # fill the beparams dict
4230       utils.ForceDictType(self.beparams, constants.BES_PARAMETER_TYPES)
4231       self.op.beparams = self.beparams
4232
4233     # extra hvparams
4234     self.hvparams = getattr(self.op, "hvparams", {})
4235     if self.hvparams:
4236       if not isinstance(self.hvparams, dict):
4237         raise errors.OpPrereqError("Invalid hvparams passed: %s, expected"
4238                                    " dict" % (type(self.hvparams), ),
4239                                    errors.ECODE_INVAL)
4240
4241       # check hypervisor parameter syntax (locally)
4242       cluster = self.cfg.GetClusterInfo()
4243       utils.ForceDictType(self.hvparams, constants.HVS_PARAMETER_TYPES)
4244       filled_hvp = cluster.FillHV(instance)
4245       filled_hvp.update(self.hvparams)
4246       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
4247       hv_type.CheckParameterSyntax(filled_hvp)
4248       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
4249       self.op.hvparams = self.hvparams
4250
4251     _CheckNodeOnline(self, instance.primary_node)
4252
4253     bep = self.cfg.GetClusterInfo().FillBE(instance)
4254     # check bridges existence
4255     _CheckInstanceBridgesExist(self, instance)
4256
4257     remote_info = self.rpc.call_instance_info(instance.primary_node,
4258                                               instance.name,
4259                                               instance.hypervisor)
4260     remote_info.Raise("Error checking node %s" % instance.primary_node,
4261                       prereq=True, ecode=errors.ECODE_ENVIRON)
4262     if not remote_info.payload: # not running already
4263       _CheckNodeFreeMemory(self, instance.primary_node,
4264                            "starting instance %s" % instance.name,
4265                            bep[constants.BE_MEMORY], instance.hypervisor)
4266
4267   def Exec(self, feedback_fn):
4268     """Start the instance.
4269
4270     """
4271     instance = self.instance
4272     force = self.op.force
4273
4274     self.cfg.MarkInstanceUp(instance.name)
4275
4276     node_current = instance.primary_node
4277
4278     _StartInstanceDisks(self, instance, force)
4279
4280     result = self.rpc.call_instance_start(node_current, instance,
4281                                           self.hvparams, self.beparams)
4282     msg = result.fail_msg
4283     if msg:
4284       _ShutdownInstanceDisks(self, instance)
4285       raise errors.OpExecError("Could not start instance: %s" % msg)
4286
4287
4288 class LURebootInstance(LogicalUnit):
4289   """Reboot an instance.
4290
4291   """
4292   HPATH = "instance-reboot"
4293   HTYPE = constants.HTYPE_INSTANCE
4294   _OP_REQP = ["instance_name", "ignore_secondaries", "reboot_type"]
4295   REQ_BGL = False
4296
4297   def CheckArguments(self):
4298     """Check the arguments.
4299
4300     """
4301     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4302                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4303
4304   def ExpandNames(self):
4305     if self.op.reboot_type not in [constants.INSTANCE_REBOOT_SOFT,
4306                                    constants.INSTANCE_REBOOT_HARD,
4307                                    constants.INSTANCE_REBOOT_FULL]:
4308       raise errors.ParameterError("reboot type not in [%s, %s, %s]" %
4309                                   (constants.INSTANCE_REBOOT_SOFT,
4310                                    constants.INSTANCE_REBOOT_HARD,
4311                                    constants.INSTANCE_REBOOT_FULL))
4312     self._ExpandAndLockInstance()
4313
4314   def BuildHooksEnv(self):
4315     """Build hooks env.
4316
4317     This runs on master, primary and secondary nodes of the instance.
4318
4319     """
4320     env = {
4321       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
4322       "REBOOT_TYPE": self.op.reboot_type,
4323       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
4324       }
4325     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
4326     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4327     return env, nl, nl
4328
4329   def CheckPrereq(self):
4330     """Check prerequisites.
4331
4332     This checks that the instance is in the cluster.
4333
4334     """
4335     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4336     assert self.instance is not None, \
4337       "Cannot retrieve locked instance %s" % self.op.instance_name
4338
4339     _CheckNodeOnline(self, instance.primary_node)
4340
4341     # check bridges existence
4342     _CheckInstanceBridgesExist(self, instance)
4343
4344   def Exec(self, feedback_fn):
4345     """Reboot the instance.
4346
4347     """
4348     instance = self.instance
4349     ignore_secondaries = self.op.ignore_secondaries
4350     reboot_type = self.op.reboot_type
4351
4352     node_current = instance.primary_node
4353
4354     if reboot_type in [constants.INSTANCE_REBOOT_SOFT,
4355                        constants.INSTANCE_REBOOT_HARD]:
4356       for disk in instance.disks:
4357         self.cfg.SetDiskID(disk, node_current)
4358       result = self.rpc.call_instance_reboot(node_current, instance,
4359                                              reboot_type,
4360                                              self.shutdown_timeout)
4361       result.Raise("Could not reboot instance")
4362     else:
4363       result = self.rpc.call_instance_shutdown(node_current, instance,
4364                                                self.shutdown_timeout)
4365       result.Raise("Could not shutdown instance for full reboot")
4366       _ShutdownInstanceDisks(self, instance)
4367       _StartInstanceDisks(self, instance, ignore_secondaries)
4368       result = self.rpc.call_instance_start(node_current, instance, None, None)
4369       msg = result.fail_msg
4370       if msg:
4371         _ShutdownInstanceDisks(self, instance)
4372         raise errors.OpExecError("Could not start instance for"
4373                                  " full reboot: %s" % msg)
4374
4375     self.cfg.MarkInstanceUp(instance.name)
4376
4377
4378 class LUShutdownInstance(LogicalUnit):
4379   """Shutdown an instance.
4380
4381   """
4382   HPATH = "instance-stop"
4383   HTYPE = constants.HTYPE_INSTANCE
4384   _OP_REQP = ["instance_name"]
4385   REQ_BGL = False
4386
4387   def CheckArguments(self):
4388     """Check the arguments.
4389
4390     """
4391     self.timeout = getattr(self.op, "timeout",
4392                            constants.DEFAULT_SHUTDOWN_TIMEOUT)
4393
4394   def ExpandNames(self):
4395     self._ExpandAndLockInstance()
4396
4397   def BuildHooksEnv(self):
4398     """Build hooks env.
4399
4400     This runs on master, primary and secondary nodes of the instance.
4401
4402     """
4403     env = _BuildInstanceHookEnvByObject(self, self.instance)
4404     env["TIMEOUT"] = self.timeout
4405     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4406     return env, nl, nl
4407
4408   def CheckPrereq(self):
4409     """Check prerequisites.
4410
4411     This checks that the instance is in the cluster.
4412
4413     """
4414     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4415     assert self.instance is not None, \
4416       "Cannot retrieve locked instance %s" % self.op.instance_name
4417     _CheckNodeOnline(self, self.instance.primary_node)
4418
4419   def Exec(self, feedback_fn):
4420     """Shutdown the instance.
4421
4422     """
4423     instance = self.instance
4424     node_current = instance.primary_node
4425     timeout = self.timeout
4426     self.cfg.MarkInstanceDown(instance.name)
4427     result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
4428     msg = result.fail_msg
4429     if msg:
4430       self.proc.LogWarning("Could not shutdown instance: %s" % msg)
4431
4432     _ShutdownInstanceDisks(self, instance)
4433
4434
4435 class LUReinstallInstance(LogicalUnit):
4436   """Reinstall an instance.
4437
4438   """
4439   HPATH = "instance-reinstall"
4440   HTYPE = constants.HTYPE_INSTANCE
4441   _OP_REQP = ["instance_name"]
4442   REQ_BGL = False
4443
4444   def ExpandNames(self):
4445     self._ExpandAndLockInstance()
4446
4447   def BuildHooksEnv(self):
4448     """Build hooks env.
4449
4450     This runs on master, primary and secondary nodes of the instance.
4451
4452     """
4453     env = _BuildInstanceHookEnvByObject(self, self.instance)
4454     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4455     return env, nl, nl
4456
4457   def CheckPrereq(self):
4458     """Check prerequisites.
4459
4460     This checks that the instance is in the cluster and is not running.
4461
4462     """
4463     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4464     assert instance is not None, \
4465       "Cannot retrieve locked instance %s" % self.op.instance_name
4466     _CheckNodeOnline(self, instance.primary_node)
4467
4468     if instance.disk_template == constants.DT_DISKLESS:
4469       raise errors.OpPrereqError("Instance '%s' has no disks" %
4470                                  self.op.instance_name,
4471                                  errors.ECODE_INVAL)
4472     _CheckInstanceDown(self, instance, "cannot reinstall")
4473
4474     self.op.os_type = getattr(self.op, "os_type", None)
4475     self.op.force_variant = getattr(self.op, "force_variant", False)
4476     if self.op.os_type is not None:
4477       # OS verification
4478       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
4479       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
4480
4481     self.instance = instance
4482
4483   def Exec(self, feedback_fn):
4484     """Reinstall the instance.
4485
4486     """
4487     inst = self.instance
4488
4489     if self.op.os_type is not None:
4490       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
4491       inst.os = self.op.os_type
4492       self.cfg.Update(inst, feedback_fn)
4493
4494     _StartInstanceDisks(self, inst, None)
4495     try:
4496       feedback_fn("Running the instance OS create scripts...")
4497       # FIXME: pass debug option from opcode to backend
4498       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
4499                                              self.op.debug_level)
4500       result.Raise("Could not install OS for instance %s on node %s" %
4501                    (inst.name, inst.primary_node))
4502     finally:
4503       _ShutdownInstanceDisks(self, inst)
4504
4505
4506 class LURecreateInstanceDisks(LogicalUnit):
4507   """Recreate an instance's missing disks.
4508
4509   """
4510   HPATH = "instance-recreate-disks"
4511   HTYPE = constants.HTYPE_INSTANCE
4512   _OP_REQP = ["instance_name", "disks"]
4513   REQ_BGL = False
4514
4515   def CheckArguments(self):
4516     """Check the arguments.
4517
4518     """
4519     if not isinstance(self.op.disks, list):
4520       raise errors.OpPrereqError("Invalid disks parameter", errors.ECODE_INVAL)
4521     for item in self.op.disks:
4522       if (not isinstance(item, int) or
4523           item < 0):
4524         raise errors.OpPrereqError("Invalid disk specification '%s'" %
4525                                    str(item), errors.ECODE_INVAL)
4526
4527   def ExpandNames(self):
4528     self._ExpandAndLockInstance()
4529
4530   def BuildHooksEnv(self):
4531     """Build hooks env.
4532
4533     This runs on master, primary and secondary nodes of the instance.
4534
4535     """
4536     env = _BuildInstanceHookEnvByObject(self, self.instance)
4537     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4538     return env, nl, nl
4539
4540   def CheckPrereq(self):
4541     """Check prerequisites.
4542
4543     This checks that the instance is in the cluster and is not running.
4544
4545     """
4546     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4547     assert instance is not None, \
4548       "Cannot retrieve locked instance %s" % self.op.instance_name
4549     _CheckNodeOnline(self, instance.primary_node)
4550
4551     if instance.disk_template == constants.DT_DISKLESS:
4552       raise errors.OpPrereqError("Instance '%s' has no disks" %
4553                                  self.op.instance_name, errors.ECODE_INVAL)
4554     _CheckInstanceDown(self, instance, "cannot recreate disks")
4555
4556     if not self.op.disks:
4557       self.op.disks = range(len(instance.disks))
4558     else:
4559       for idx in self.op.disks:
4560         if idx >= len(instance.disks):
4561           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
4562                                      errors.ECODE_INVAL)
4563
4564     self.instance = instance
4565
4566   def Exec(self, feedback_fn):
4567     """Recreate the disks.
4568
4569     """
4570     to_skip = []
4571     for idx, _ in enumerate(self.instance.disks):
4572       if idx not in self.op.disks: # disk idx has not been passed in
4573         to_skip.append(idx)
4574         continue
4575
4576     _CreateDisks(self, self.instance, to_skip=to_skip)
4577
4578
4579 class LURenameInstance(LogicalUnit):
4580   """Rename an instance.
4581
4582   """
4583   HPATH = "instance-rename"
4584   HTYPE = constants.HTYPE_INSTANCE
4585   _OP_REQP = ["instance_name", "new_name"]
4586
4587   def BuildHooksEnv(self):
4588     """Build hooks env.
4589
4590     This runs on master, primary and secondary nodes of the instance.
4591
4592     """
4593     env = _BuildInstanceHookEnvByObject(self, self.instance)
4594     env["INSTANCE_NEW_NAME"] = self.op.new_name
4595     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
4596     return env, nl, nl
4597
4598   def CheckPrereq(self):
4599     """Check prerequisites.
4600
4601     This checks that the instance is in the cluster and is not running.
4602
4603     """
4604     self.op.instance_name = _ExpandInstanceName(self.cfg,
4605                                                 self.op.instance_name)
4606     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4607     assert instance is not None
4608     _CheckNodeOnline(self, instance.primary_node)
4609     _CheckInstanceDown(self, instance, "cannot rename")
4610     self.instance = instance
4611
4612     # new name verification
4613     name_info = utils.GetHostInfo(self.op.new_name)
4614
4615     self.op.new_name = new_name = name_info.name
4616     instance_list = self.cfg.GetInstanceList()
4617     if new_name in instance_list:
4618       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
4619                                  new_name, errors.ECODE_EXISTS)
4620
4621     if not getattr(self.op, "ignore_ip", False):
4622       if utils.TcpPing(name_info.ip, constants.DEFAULT_NODED_PORT):
4623         raise errors.OpPrereqError("IP %s of instance %s already in use" %
4624                                    (name_info.ip, new_name),
4625                                    errors.ECODE_NOTUNIQUE)
4626
4627
4628   def Exec(self, feedback_fn):
4629     """Reinstall the instance.
4630
4631     """
4632     inst = self.instance
4633     old_name = inst.name
4634
4635     if inst.disk_template == constants.DT_FILE:
4636       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4637
4638     self.cfg.RenameInstance(inst.name, self.op.new_name)
4639     # Change the instance lock. This is definitely safe while we hold the BGL
4640     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
4641     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
4642
4643     # re-read the instance from the configuration after rename
4644     inst = self.cfg.GetInstanceInfo(self.op.new_name)
4645
4646     if inst.disk_template == constants.DT_FILE:
4647       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
4648       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
4649                                                      old_file_storage_dir,
4650                                                      new_file_storage_dir)
4651       result.Raise("Could not rename on node %s directory '%s' to '%s'"
4652                    " (but the instance has been renamed in Ganeti)" %
4653                    (inst.primary_node, old_file_storage_dir,
4654                     new_file_storage_dir))
4655
4656     _StartInstanceDisks(self, inst, None)
4657     try:
4658       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
4659                                                  old_name, self.op.debug_level)
4660       msg = result.fail_msg
4661       if msg:
4662         msg = ("Could not run OS rename script for instance %s on node %s"
4663                " (but the instance has been renamed in Ganeti): %s" %
4664                (inst.name, inst.primary_node, msg))
4665         self.proc.LogWarning(msg)
4666     finally:
4667       _ShutdownInstanceDisks(self, inst)
4668
4669
4670 class LURemoveInstance(LogicalUnit):
4671   """Remove an instance.
4672
4673   """
4674   HPATH = "instance-remove"
4675   HTYPE = constants.HTYPE_INSTANCE
4676   _OP_REQP = ["instance_name", "ignore_failures"]
4677   REQ_BGL = False
4678
4679   def CheckArguments(self):
4680     """Check the arguments.
4681
4682     """
4683     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
4684                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
4685
4686   def ExpandNames(self):
4687     self._ExpandAndLockInstance()
4688     self.needed_locks[locking.LEVEL_NODE] = []
4689     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4690
4691   def DeclareLocks(self, level):
4692     if level == locking.LEVEL_NODE:
4693       self._LockInstancesNodes()
4694
4695   def BuildHooksEnv(self):
4696     """Build hooks env.
4697
4698     This runs on master, primary and secondary nodes of the instance.
4699
4700     """
4701     env = _BuildInstanceHookEnvByObject(self, self.instance)
4702     env["SHUTDOWN_TIMEOUT"] = self.shutdown_timeout
4703     nl = [self.cfg.GetMasterNode()]
4704     nl_post = list(self.instance.all_nodes) + nl
4705     return env, nl, nl_post
4706
4707   def CheckPrereq(self):
4708     """Check prerequisites.
4709
4710     This checks that the instance is in the cluster.
4711
4712     """
4713     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
4714     assert self.instance is not None, \
4715       "Cannot retrieve locked instance %s" % self.op.instance_name
4716
4717   def Exec(self, feedback_fn):
4718     """Remove the instance.
4719
4720     """
4721     instance = self.instance
4722     logging.info("Shutting down instance %s on node %s",
4723                  instance.name, instance.primary_node)
4724
4725     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
4726                                              self.shutdown_timeout)
4727     msg = result.fail_msg
4728     if msg:
4729       if self.op.ignore_failures:
4730         feedback_fn("Warning: can't shutdown instance: %s" % msg)
4731       else:
4732         raise errors.OpExecError("Could not shutdown instance %s on"
4733                                  " node %s: %s" %
4734                                  (instance.name, instance.primary_node, msg))
4735
4736     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
4737
4738
4739 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
4740   """Utility function to remove an instance.
4741
4742   """
4743   logging.info("Removing block devices for instance %s", instance.name)
4744
4745   if not _RemoveDisks(lu, instance):
4746     if not ignore_failures:
4747       raise errors.OpExecError("Can't remove instance's disks")
4748     feedback_fn("Warning: can't remove instance's disks")
4749
4750   logging.info("Removing instance %s out of cluster config", instance.name)
4751
4752   lu.cfg.RemoveInstance(instance.name)
4753
4754   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
4755     "Instance lock removal conflict"
4756
4757   # Remove lock for the instance
4758   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
4759
4760
4761 class LUQueryInstances(NoHooksLU):
4762   """Logical unit for querying instances.
4763
4764   """
4765   # pylint: disable-msg=W0142
4766   _OP_REQP = ["output_fields", "names", "use_locking"]
4767   REQ_BGL = False
4768   _SIMPLE_FIELDS = ["name", "os", "network_port", "hypervisor",
4769                     "serial_no", "ctime", "mtime", "uuid"]
4770   _FIELDS_STATIC = utils.FieldSet(*["name", "os", "pnode", "snodes",
4771                                     "admin_state",
4772                                     "disk_template", "ip", "mac", "bridge",
4773                                     "nic_mode", "nic_link",
4774                                     "sda_size", "sdb_size", "vcpus", "tags",
4775                                     "network_port", "beparams",
4776                                     r"(disk)\.(size)/([0-9]+)",
4777                                     r"(disk)\.(sizes)", "disk_usage",
4778                                     r"(nic)\.(mac|ip|mode|link)/([0-9]+)",
4779                                     r"(nic)\.(bridge)/([0-9]+)",
4780                                     r"(nic)\.(macs|ips|modes|links|bridges)",
4781                                     r"(disk|nic)\.(count)",
4782                                     "hvparams",
4783                                     ] + _SIMPLE_FIELDS +
4784                                   ["hv/%s" % name
4785                                    for name in constants.HVS_PARAMETERS
4786                                    if name not in constants.HVC_GLOBALS] +
4787                                   ["be/%s" % name
4788                                    for name in constants.BES_PARAMETERS])
4789   _FIELDS_DYNAMIC = utils.FieldSet("oper_state", "oper_ram", "status")
4790
4791
4792   def ExpandNames(self):
4793     _CheckOutputFields(static=self._FIELDS_STATIC,
4794                        dynamic=self._FIELDS_DYNAMIC,
4795                        selected=self.op.output_fields)
4796
4797     self.needed_locks = {}
4798     self.share_locks[locking.LEVEL_INSTANCE] = 1
4799     self.share_locks[locking.LEVEL_NODE] = 1
4800
4801     if self.op.names:
4802       self.wanted = _GetWantedInstances(self, self.op.names)
4803     else:
4804       self.wanted = locking.ALL_SET
4805
4806     self.do_node_query = self._FIELDS_STATIC.NonMatching(self.op.output_fields)
4807     self.do_locking = self.do_node_query and self.op.use_locking
4808     if self.do_locking:
4809       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4810       self.needed_locks[locking.LEVEL_NODE] = []
4811       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4812
4813   def DeclareLocks(self, level):
4814     if level == locking.LEVEL_NODE and self.do_locking:
4815       self._LockInstancesNodes()
4816
4817   def CheckPrereq(self):
4818     """Check prerequisites.
4819
4820     """
4821     pass
4822
4823   def Exec(self, feedback_fn):
4824     """Computes the list of nodes and their attributes.
4825
4826     """
4827     # pylint: disable-msg=R0912
4828     # way too many branches here
4829     all_info = self.cfg.GetAllInstancesInfo()
4830     if self.wanted == locking.ALL_SET:
4831       # caller didn't specify instance names, so ordering is not important
4832       if self.do_locking:
4833         instance_names = self.acquired_locks[locking.LEVEL_INSTANCE]
4834       else:
4835         instance_names = all_info.keys()
4836       instance_names = utils.NiceSort(instance_names)
4837     else:
4838       # caller did specify names, so we must keep the ordering
4839       if self.do_locking:
4840         tgt_set = self.acquired_locks[locking.LEVEL_INSTANCE]
4841       else:
4842         tgt_set = all_info.keys()
4843       missing = set(self.wanted).difference(tgt_set)
4844       if missing:
4845         raise errors.OpExecError("Some instances were removed before"
4846                                  " retrieving their data: %s" % missing)
4847       instance_names = self.wanted
4848
4849     instance_list = [all_info[iname] for iname in instance_names]
4850
4851     # begin data gathering
4852
4853     nodes = frozenset([inst.primary_node for inst in instance_list])
4854     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4855
4856     bad_nodes = []
4857     off_nodes = []
4858     if self.do_node_query:
4859       live_data = {}
4860       node_data = self.rpc.call_all_instances_info(nodes, hv_list)
4861       for name in nodes:
4862         result = node_data[name]
4863         if result.offline:
4864           # offline nodes will be in both lists
4865           off_nodes.append(name)
4866         if result.fail_msg:
4867           bad_nodes.append(name)
4868         else:
4869           if result.payload:
4870             live_data.update(result.payload)
4871           # else no instance is alive
4872     else:
4873       live_data = dict([(name, {}) for name in instance_names])
4874
4875     # end data gathering
4876
4877     HVPREFIX = "hv/"
4878     BEPREFIX = "be/"
4879     output = []
4880     cluster = self.cfg.GetClusterInfo()
4881     for instance in instance_list:
4882       iout = []
4883       i_hv = cluster.FillHV(instance, skip_globals=True)
4884       i_be = cluster.FillBE(instance)
4885       i_nicp = [cluster.SimpleFillNIC(nic.nicparams) for nic in instance.nics]
4886       for field in self.op.output_fields:
4887         st_match = self._FIELDS_STATIC.Matches(field)
4888         if field in self._SIMPLE_FIELDS:
4889           val = getattr(instance, field)
4890         elif field == "pnode":
4891           val = instance.primary_node
4892         elif field == "snodes":
4893           val = list(instance.secondary_nodes)
4894         elif field == "admin_state":
4895           val = instance.admin_up
4896         elif field == "oper_state":
4897           if instance.primary_node in bad_nodes:
4898             val = None
4899           else:
4900             val = bool(live_data.get(instance.name))
4901         elif field == "status":
4902           if instance.primary_node in off_nodes:
4903             val = "ERROR_nodeoffline"
4904           elif instance.primary_node in bad_nodes:
4905             val = "ERROR_nodedown"
4906           else:
4907             running = bool(live_data.get(instance.name))
4908             if running:
4909               if instance.admin_up:
4910                 val = "running"
4911               else:
4912                 val = "ERROR_up"
4913             else:
4914               if instance.admin_up:
4915                 val = "ERROR_down"
4916               else:
4917                 val = "ADMIN_down"
4918         elif field == "oper_ram":
4919           if instance.primary_node in bad_nodes:
4920             val = None
4921           elif instance.name in live_data:
4922             val = live_data[instance.name].get("memory", "?")
4923           else:
4924             val = "-"
4925         elif field == "vcpus":
4926           val = i_be[constants.BE_VCPUS]
4927         elif field == "disk_template":
4928           val = instance.disk_template
4929         elif field == "ip":
4930           if instance.nics:
4931             val = instance.nics[0].ip
4932           else:
4933             val = None
4934         elif field == "nic_mode":
4935           if instance.nics:
4936             val = i_nicp[0][constants.NIC_MODE]
4937           else:
4938             val = None
4939         elif field == "nic_link":
4940           if instance.nics:
4941             val = i_nicp[0][constants.NIC_LINK]
4942           else:
4943             val = None
4944         elif field == "bridge":
4945           if (instance.nics and
4946               i_nicp[0][constants.NIC_MODE] == constants.NIC_MODE_BRIDGED):
4947             val = i_nicp[0][constants.NIC_LINK]
4948           else:
4949             val = None
4950         elif field == "mac":
4951           if instance.nics:
4952             val = instance.nics[0].mac
4953           else:
4954             val = None
4955         elif field == "sda_size" or field == "sdb_size":
4956           idx = ord(field[2]) - ord('a')
4957           try:
4958             val = instance.FindDisk(idx).size
4959           except errors.OpPrereqError:
4960             val = None
4961         elif field == "disk_usage": # total disk usage per node
4962           disk_sizes = [{'size': disk.size} for disk in instance.disks]
4963           val = _ComputeDiskSize(instance.disk_template, disk_sizes)
4964         elif field == "tags":
4965           val = list(instance.GetTags())
4966         elif field == "hvparams":
4967           val = i_hv
4968         elif (field.startswith(HVPREFIX) and
4969               field[len(HVPREFIX):] in constants.HVS_PARAMETERS and
4970               field[len(HVPREFIX):] not in constants.HVC_GLOBALS):
4971           val = i_hv.get(field[len(HVPREFIX):], None)
4972         elif field == "beparams":
4973           val = i_be
4974         elif (field.startswith(BEPREFIX) and
4975               field[len(BEPREFIX):] in constants.BES_PARAMETERS):
4976           val = i_be.get(field[len(BEPREFIX):], None)
4977         elif st_match and st_match.groups():
4978           # matches a variable list
4979           st_groups = st_match.groups()
4980           if st_groups and st_groups[0] == "disk":
4981             if st_groups[1] == "count":
4982               val = len(instance.disks)
4983             elif st_groups[1] == "sizes":
4984               val = [disk.size for disk in instance.disks]
4985             elif st_groups[1] == "size":
4986               try:
4987                 val = instance.FindDisk(st_groups[2]).size
4988               except errors.OpPrereqError:
4989                 val = None
4990             else:
4991               assert False, "Unhandled disk parameter"
4992           elif st_groups[0] == "nic":
4993             if st_groups[1] == "count":
4994               val = len(instance.nics)
4995             elif st_groups[1] == "macs":
4996               val = [nic.mac for nic in instance.nics]
4997             elif st_groups[1] == "ips":
4998               val = [nic.ip for nic in instance.nics]
4999             elif st_groups[1] == "modes":
5000               val = [nicp[constants.NIC_MODE] for nicp in i_nicp]
5001             elif st_groups[1] == "links":
5002               val = [nicp[constants.NIC_LINK] for nicp in i_nicp]
5003             elif st_groups[1] == "bridges":
5004               val = []
5005               for nicp in i_nicp:
5006                 if nicp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
5007                   val.append(nicp[constants.NIC_LINK])
5008                 else:
5009                   val.append(None)
5010             else:
5011               # index-based item
5012               nic_idx = int(st_groups[2])
5013               if nic_idx >= len(instance.nics):
5014                 val = None
5015               else:
5016                 if st_groups[1] == "mac":
5017                   val = instance.nics[nic_idx].mac
5018                 elif st_groups[1] == "ip":
5019                   val = instance.nics[nic_idx].ip
5020                 elif st_groups[1] == "mode":
5021                   val = i_nicp[nic_idx][constants.NIC_MODE]
5022                 elif st_groups[1] == "link":
5023                   val = i_nicp[nic_idx][constants.NIC_LINK]
5024                 elif st_groups[1] == "bridge":
5025                   nic_mode = i_nicp[nic_idx][constants.NIC_MODE]
5026                   if nic_mode == constants.NIC_MODE_BRIDGED:
5027                     val = i_nicp[nic_idx][constants.NIC_LINK]
5028                   else:
5029                     val = None
5030                 else:
5031                   assert False, "Unhandled NIC parameter"
5032           else:
5033             assert False, ("Declared but unhandled variable parameter '%s'" %
5034                            field)
5035         else:
5036           assert False, "Declared but unhandled parameter '%s'" % field
5037         iout.append(val)
5038       output.append(iout)
5039
5040     return output
5041
5042
5043 class LUFailoverInstance(LogicalUnit):
5044   """Failover an instance.
5045
5046   """
5047   HPATH = "instance-failover"
5048   HTYPE = constants.HTYPE_INSTANCE
5049   _OP_REQP = ["instance_name", "ignore_consistency"]
5050   REQ_BGL = False
5051
5052   def CheckArguments(self):
5053     """Check the arguments.
5054
5055     """
5056     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5057                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
5058
5059   def ExpandNames(self):
5060     self._ExpandAndLockInstance()
5061     self.needed_locks[locking.LEVEL_NODE] = []
5062     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5063
5064   def DeclareLocks(self, level):
5065     if level == locking.LEVEL_NODE:
5066       self._LockInstancesNodes()
5067
5068   def BuildHooksEnv(self):
5069     """Build hooks env.
5070
5071     This runs on master, primary and secondary nodes of the instance.
5072
5073     """
5074     instance = self.instance
5075     source_node = instance.primary_node
5076     target_node = instance.secondary_nodes[0]
5077     env = {
5078       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
5079       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5080       "OLD_PRIMARY": source_node,
5081       "OLD_SECONDARY": target_node,
5082       "NEW_PRIMARY": target_node,
5083       "NEW_SECONDARY": source_node,
5084       }
5085     env.update(_BuildInstanceHookEnvByObject(self, instance))
5086     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5087     nl_post = list(nl)
5088     nl_post.append(source_node)
5089     return env, nl, nl_post
5090
5091   def CheckPrereq(self):
5092     """Check prerequisites.
5093
5094     This checks that the instance is in the cluster.
5095
5096     """
5097     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5098     assert self.instance is not None, \
5099       "Cannot retrieve locked instance %s" % self.op.instance_name
5100
5101     bep = self.cfg.GetClusterInfo().FillBE(instance)
5102     if instance.disk_template not in constants.DTS_NET_MIRROR:
5103       raise errors.OpPrereqError("Instance's disk layout is not"
5104                                  " network mirrored, cannot failover.",
5105                                  errors.ECODE_STATE)
5106
5107     secondary_nodes = instance.secondary_nodes
5108     if not secondary_nodes:
5109       raise errors.ProgrammerError("no secondary node but using "
5110                                    "a mirrored disk template")
5111
5112     target_node = secondary_nodes[0]
5113     _CheckNodeOnline(self, target_node)
5114     _CheckNodeNotDrained(self, target_node)
5115     if instance.admin_up:
5116       # check memory requirements on the secondary node
5117       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5118                            instance.name, bep[constants.BE_MEMORY],
5119                            instance.hypervisor)
5120     else:
5121       self.LogInfo("Not checking memory on the secondary node as"
5122                    " instance will not be started")
5123
5124     # check bridge existance
5125     _CheckInstanceBridgesExist(self, instance, node=target_node)
5126
5127   def Exec(self, feedback_fn):
5128     """Failover an instance.
5129
5130     The failover is done by shutting it down on its present node and
5131     starting it on the secondary.
5132
5133     """
5134     instance = self.instance
5135
5136     source_node = instance.primary_node
5137     target_node = instance.secondary_nodes[0]
5138
5139     if instance.admin_up:
5140       feedback_fn("* checking disk consistency between source and target")
5141       for dev in instance.disks:
5142         # for drbd, these are drbd over lvm
5143         if not _CheckDiskConsistency(self, dev, target_node, False):
5144           if not self.op.ignore_consistency:
5145             raise errors.OpExecError("Disk %s is degraded on target node,"
5146                                      " aborting failover." % dev.iv_name)
5147     else:
5148       feedback_fn("* not checking disk consistency as instance is not running")
5149
5150     feedback_fn("* shutting down instance on source node")
5151     logging.info("Shutting down instance %s on node %s",
5152                  instance.name, source_node)
5153
5154     result = self.rpc.call_instance_shutdown(source_node, instance,
5155                                              self.shutdown_timeout)
5156     msg = result.fail_msg
5157     if msg:
5158       if self.op.ignore_consistency:
5159         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5160                              " Proceeding anyway. Please make sure node"
5161                              " %s is down. Error details: %s",
5162                              instance.name, source_node, source_node, msg)
5163       else:
5164         raise errors.OpExecError("Could not shutdown instance %s on"
5165                                  " node %s: %s" %
5166                                  (instance.name, source_node, msg))
5167
5168     feedback_fn("* deactivating the instance's disks on source node")
5169     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
5170       raise errors.OpExecError("Can't shut down the instance's disks.")
5171
5172     instance.primary_node = target_node
5173     # distribute new instance config to the other nodes
5174     self.cfg.Update(instance, feedback_fn)
5175
5176     # Only start the instance if it's marked as up
5177     if instance.admin_up:
5178       feedback_fn("* activating the instance's disks on target node")
5179       logging.info("Starting instance %s on node %s",
5180                    instance.name, target_node)
5181
5182       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5183                                                ignore_secondaries=True)
5184       if not disks_ok:
5185         _ShutdownInstanceDisks(self, instance)
5186         raise errors.OpExecError("Can't activate the instance's disks")
5187
5188       feedback_fn("* starting the instance on the target node")
5189       result = self.rpc.call_instance_start(target_node, instance, None, None)
5190       msg = result.fail_msg
5191       if msg:
5192         _ShutdownInstanceDisks(self, instance)
5193         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5194                                  (instance.name, target_node, msg))
5195
5196
5197 class LUMigrateInstance(LogicalUnit):
5198   """Migrate an instance.
5199
5200   This is migration without shutting down, compared to the failover,
5201   which is done with shutdown.
5202
5203   """
5204   HPATH = "instance-migrate"
5205   HTYPE = constants.HTYPE_INSTANCE
5206   _OP_REQP = ["instance_name", "live", "cleanup"]
5207
5208   REQ_BGL = False
5209
5210   def ExpandNames(self):
5211     self._ExpandAndLockInstance()
5212
5213     self.needed_locks[locking.LEVEL_NODE] = []
5214     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5215
5216     self._migrater = TLMigrateInstance(self, self.op.instance_name,
5217                                        self.op.live, self.op.cleanup)
5218     self.tasklets = [self._migrater]
5219
5220   def DeclareLocks(self, level):
5221     if level == locking.LEVEL_NODE:
5222       self._LockInstancesNodes()
5223
5224   def BuildHooksEnv(self):
5225     """Build hooks env.
5226
5227     This runs on master, primary and secondary nodes of the instance.
5228
5229     """
5230     instance = self._migrater.instance
5231     source_node = instance.primary_node
5232     target_node = instance.secondary_nodes[0]
5233     env = _BuildInstanceHookEnvByObject(self, instance)
5234     env["MIGRATE_LIVE"] = self.op.live
5235     env["MIGRATE_CLEANUP"] = self.op.cleanup
5236     env.update({
5237         "OLD_PRIMARY": source_node,
5238         "OLD_SECONDARY": target_node,
5239         "NEW_PRIMARY": target_node,
5240         "NEW_SECONDARY": source_node,
5241         })
5242     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
5243     nl_post = list(nl)
5244     nl_post.append(source_node)
5245     return env, nl, nl_post
5246
5247
5248 class LUMoveInstance(LogicalUnit):
5249   """Move an instance by data-copying.
5250
5251   """
5252   HPATH = "instance-move"
5253   HTYPE = constants.HTYPE_INSTANCE
5254   _OP_REQP = ["instance_name", "target_node"]
5255   REQ_BGL = False
5256
5257   def CheckArguments(self):
5258     """Check the arguments.
5259
5260     """
5261     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
5262                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
5263
5264   def ExpandNames(self):
5265     self._ExpandAndLockInstance()
5266     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
5267     self.op.target_node = target_node
5268     self.needed_locks[locking.LEVEL_NODE] = [target_node]
5269     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5270
5271   def DeclareLocks(self, level):
5272     if level == locking.LEVEL_NODE:
5273       self._LockInstancesNodes(primary_only=True)
5274
5275   def BuildHooksEnv(self):
5276     """Build hooks env.
5277
5278     This runs on master, primary and secondary nodes of the instance.
5279
5280     """
5281     env = {
5282       "TARGET_NODE": self.op.target_node,
5283       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
5284       }
5285     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5286     nl = [self.cfg.GetMasterNode()] + [self.instance.primary_node,
5287                                        self.op.target_node]
5288     return env, nl, nl
5289
5290   def CheckPrereq(self):
5291     """Check prerequisites.
5292
5293     This checks that the instance is in the cluster.
5294
5295     """
5296     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5297     assert self.instance is not None, \
5298       "Cannot retrieve locked instance %s" % self.op.instance_name
5299
5300     node = self.cfg.GetNodeInfo(self.op.target_node)
5301     assert node is not None, \
5302       "Cannot retrieve locked node %s" % self.op.target_node
5303
5304     self.target_node = target_node = node.name
5305
5306     if target_node == instance.primary_node:
5307       raise errors.OpPrereqError("Instance %s is already on the node %s" %
5308                                  (instance.name, target_node),
5309                                  errors.ECODE_STATE)
5310
5311     bep = self.cfg.GetClusterInfo().FillBE(instance)
5312
5313     for idx, dsk in enumerate(instance.disks):
5314       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
5315         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
5316                                    " cannot copy" % idx, errors.ECODE_STATE)
5317
5318     _CheckNodeOnline(self, target_node)
5319     _CheckNodeNotDrained(self, target_node)
5320
5321     if instance.admin_up:
5322       # check memory requirements on the secondary node
5323       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
5324                            instance.name, bep[constants.BE_MEMORY],
5325                            instance.hypervisor)
5326     else:
5327       self.LogInfo("Not checking memory on the secondary node as"
5328                    " instance will not be started")
5329
5330     # check bridge existance
5331     _CheckInstanceBridgesExist(self, instance, node=target_node)
5332
5333   def Exec(self, feedback_fn):
5334     """Move an instance.
5335
5336     The move is done by shutting it down on its present node, copying
5337     the data over (slow) and starting it on the new node.
5338
5339     """
5340     instance = self.instance
5341
5342     source_node = instance.primary_node
5343     target_node = self.target_node
5344
5345     self.LogInfo("Shutting down instance %s on source node %s",
5346                  instance.name, source_node)
5347
5348     result = self.rpc.call_instance_shutdown(source_node, instance,
5349                                              self.shutdown_timeout)
5350     msg = result.fail_msg
5351     if msg:
5352       if self.op.ignore_consistency:
5353         self.proc.LogWarning("Could not shutdown instance %s on node %s."
5354                              " Proceeding anyway. Please make sure node"
5355                              " %s is down. Error details: %s",
5356                              instance.name, source_node, source_node, msg)
5357       else:
5358         raise errors.OpExecError("Could not shutdown instance %s on"
5359                                  " node %s: %s" %
5360                                  (instance.name, source_node, msg))
5361
5362     # create the target disks
5363     try:
5364       _CreateDisks(self, instance, target_node=target_node)
5365     except errors.OpExecError:
5366       self.LogWarning("Device creation failed, reverting...")
5367       try:
5368         _RemoveDisks(self, instance, target_node=target_node)
5369       finally:
5370         self.cfg.ReleaseDRBDMinors(instance.name)
5371         raise
5372
5373     cluster_name = self.cfg.GetClusterInfo().cluster_name
5374
5375     errs = []
5376     # activate, get path, copy the data over
5377     for idx, disk in enumerate(instance.disks):
5378       self.LogInfo("Copying data for disk %d", idx)
5379       result = self.rpc.call_blockdev_assemble(target_node, disk,
5380                                                instance.name, True)
5381       if result.fail_msg:
5382         self.LogWarning("Can't assemble newly created disk %d: %s",
5383                         idx, result.fail_msg)
5384         errs.append(result.fail_msg)
5385         break
5386       dev_path = result.payload
5387       result = self.rpc.call_blockdev_export(source_node, disk,
5388                                              target_node, dev_path,
5389                                              cluster_name)
5390       if result.fail_msg:
5391         self.LogWarning("Can't copy data over for disk %d: %s",
5392                         idx, result.fail_msg)
5393         errs.append(result.fail_msg)
5394         break
5395
5396     if errs:
5397       self.LogWarning("Some disks failed to copy, aborting")
5398       try:
5399         _RemoveDisks(self, instance, target_node=target_node)
5400       finally:
5401         self.cfg.ReleaseDRBDMinors(instance.name)
5402         raise errors.OpExecError("Errors during disk copy: %s" %
5403                                  (",".join(errs),))
5404
5405     instance.primary_node = target_node
5406     self.cfg.Update(instance, feedback_fn)
5407
5408     self.LogInfo("Removing the disks on the original node")
5409     _RemoveDisks(self, instance, target_node=source_node)
5410
5411     # Only start the instance if it's marked as up
5412     if instance.admin_up:
5413       self.LogInfo("Starting instance %s on node %s",
5414                    instance.name, target_node)
5415
5416       disks_ok, _ = _AssembleInstanceDisks(self, instance,
5417                                            ignore_secondaries=True)
5418       if not disks_ok:
5419         _ShutdownInstanceDisks(self, instance)
5420         raise errors.OpExecError("Can't activate the instance's disks")
5421
5422       result = self.rpc.call_instance_start(target_node, instance, None, None)
5423       msg = result.fail_msg
5424       if msg:
5425         _ShutdownInstanceDisks(self, instance)
5426         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
5427                                  (instance.name, target_node, msg))
5428
5429
5430 class LUMigrateNode(LogicalUnit):
5431   """Migrate all instances from a node.
5432
5433   """
5434   HPATH = "node-migrate"
5435   HTYPE = constants.HTYPE_NODE
5436   _OP_REQP = ["node_name", "live"]
5437   REQ_BGL = False
5438
5439   def ExpandNames(self):
5440     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5441
5442     self.needed_locks = {
5443       locking.LEVEL_NODE: [self.op.node_name],
5444       }
5445
5446     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
5447
5448     # Create tasklets for migrating instances for all instances on this node
5449     names = []
5450     tasklets = []
5451
5452     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
5453       logging.debug("Migrating instance %s", inst.name)
5454       names.append(inst.name)
5455
5456       tasklets.append(TLMigrateInstance(self, inst.name, self.op.live, False))
5457
5458     self.tasklets = tasklets
5459
5460     # Declare instance locks
5461     self.needed_locks[locking.LEVEL_INSTANCE] = names
5462
5463   def DeclareLocks(self, level):
5464     if level == locking.LEVEL_NODE:
5465       self._LockInstancesNodes()
5466
5467   def BuildHooksEnv(self):
5468     """Build hooks env.
5469
5470     This runs on the master, the primary and all the secondaries.
5471
5472     """
5473     env = {
5474       "NODE_NAME": self.op.node_name,
5475       }
5476
5477     nl = [self.cfg.GetMasterNode()]
5478
5479     return (env, nl, nl)
5480
5481
5482 class TLMigrateInstance(Tasklet):
5483   def __init__(self, lu, instance_name, live, cleanup):
5484     """Initializes this class.
5485
5486     """
5487     Tasklet.__init__(self, lu)
5488
5489     # Parameters
5490     self.instance_name = instance_name
5491     self.live = live
5492     self.cleanup = cleanup
5493
5494   def CheckPrereq(self):
5495     """Check prerequisites.
5496
5497     This checks that the instance is in the cluster.
5498
5499     """
5500     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
5501     instance = self.cfg.GetInstanceInfo(instance_name)
5502     assert instance is not None
5503
5504     if instance.disk_template != constants.DT_DRBD8:
5505       raise errors.OpPrereqError("Instance's disk layout is not"
5506                                  " drbd8, cannot migrate.", errors.ECODE_STATE)
5507
5508     secondary_nodes = instance.secondary_nodes
5509     if not secondary_nodes:
5510       raise errors.ConfigurationError("No secondary node but using"
5511                                       " drbd8 disk template")
5512
5513     i_be = self.cfg.GetClusterInfo().FillBE(instance)
5514
5515     target_node = secondary_nodes[0]
5516     # check memory requirements on the secondary node
5517     _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
5518                          instance.name, i_be[constants.BE_MEMORY],
5519                          instance.hypervisor)
5520
5521     # check bridge existance
5522     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
5523
5524     if not self.cleanup:
5525       _CheckNodeNotDrained(self.lu, target_node)
5526       result = self.rpc.call_instance_migratable(instance.primary_node,
5527                                                  instance)
5528       result.Raise("Can't migrate, please use failover",
5529                    prereq=True, ecode=errors.ECODE_STATE)
5530
5531     self.instance = instance
5532
5533   def _WaitUntilSync(self):
5534     """Poll with custom rpc for disk sync.
5535
5536     This uses our own step-based rpc call.
5537
5538     """
5539     self.feedback_fn("* wait until resync is done")
5540     all_done = False
5541     while not all_done:
5542       all_done = True
5543       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
5544                                             self.nodes_ip,
5545                                             self.instance.disks)
5546       min_percent = 100
5547       for node, nres in result.items():
5548         nres.Raise("Cannot resync disks on node %s" % node)
5549         node_done, node_percent = nres.payload
5550         all_done = all_done and node_done
5551         if node_percent is not None:
5552           min_percent = min(min_percent, node_percent)
5553       if not all_done:
5554         if min_percent < 100:
5555           self.feedback_fn("   - progress: %.1f%%" % min_percent)
5556         time.sleep(2)
5557
5558   def _EnsureSecondary(self, node):
5559     """Demote a node to secondary.
5560
5561     """
5562     self.feedback_fn("* switching node %s to secondary mode" % node)
5563
5564     for dev in self.instance.disks:
5565       self.cfg.SetDiskID(dev, node)
5566
5567     result = self.rpc.call_blockdev_close(node, self.instance.name,
5568                                           self.instance.disks)
5569     result.Raise("Cannot change disk to secondary on node %s" % node)
5570
5571   def _GoStandalone(self):
5572     """Disconnect from the network.
5573
5574     """
5575     self.feedback_fn("* changing into standalone mode")
5576     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
5577                                                self.instance.disks)
5578     for node, nres in result.items():
5579       nres.Raise("Cannot disconnect disks node %s" % node)
5580
5581   def _GoReconnect(self, multimaster):
5582     """Reconnect to the network.
5583
5584     """
5585     if multimaster:
5586       msg = "dual-master"
5587     else:
5588       msg = "single-master"
5589     self.feedback_fn("* changing disks into %s mode" % msg)
5590     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
5591                                            self.instance.disks,
5592                                            self.instance.name, multimaster)
5593     for node, nres in result.items():
5594       nres.Raise("Cannot change disks config on node %s" % node)
5595
5596   def _ExecCleanup(self):
5597     """Try to cleanup after a failed migration.
5598
5599     The cleanup is done by:
5600       - check that the instance is running only on one node
5601         (and update the config if needed)
5602       - change disks on its secondary node to secondary
5603       - wait until disks are fully synchronized
5604       - disconnect from the network
5605       - change disks into single-master mode
5606       - wait again until disks are fully synchronized
5607
5608     """
5609     instance = self.instance
5610     target_node = self.target_node
5611     source_node = self.source_node
5612
5613     # check running on only one node
5614     self.feedback_fn("* checking where the instance actually runs"
5615                      " (if this hangs, the hypervisor might be in"
5616                      " a bad state)")
5617     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
5618     for node, result in ins_l.items():
5619       result.Raise("Can't contact node %s" % node)
5620
5621     runningon_source = instance.name in ins_l[source_node].payload
5622     runningon_target = instance.name in ins_l[target_node].payload
5623
5624     if runningon_source and runningon_target:
5625       raise errors.OpExecError("Instance seems to be running on two nodes,"
5626                                " or the hypervisor is confused. You will have"
5627                                " to ensure manually that it runs only on one"
5628                                " and restart this operation.")
5629
5630     if not (runningon_source or runningon_target):
5631       raise errors.OpExecError("Instance does not seem to be running at all."
5632                                " In this case, it's safer to repair by"
5633                                " running 'gnt-instance stop' to ensure disk"
5634                                " shutdown, and then restarting it.")
5635
5636     if runningon_target:
5637       # the migration has actually succeeded, we need to update the config
5638       self.feedback_fn("* instance running on secondary node (%s),"
5639                        " updating config" % target_node)
5640       instance.primary_node = target_node
5641       self.cfg.Update(instance, self.feedback_fn)
5642       demoted_node = source_node
5643     else:
5644       self.feedback_fn("* instance confirmed to be running on its"
5645                        " primary node (%s)" % source_node)
5646       demoted_node = target_node
5647
5648     self._EnsureSecondary(demoted_node)
5649     try:
5650       self._WaitUntilSync()
5651     except errors.OpExecError:
5652       # we ignore here errors, since if the device is standalone, it
5653       # won't be able to sync
5654       pass
5655     self._GoStandalone()
5656     self._GoReconnect(False)
5657     self._WaitUntilSync()
5658
5659     self.feedback_fn("* done")
5660
5661   def _RevertDiskStatus(self):
5662     """Try to revert the disk status after a failed migration.
5663
5664     """
5665     target_node = self.target_node
5666     try:
5667       self._EnsureSecondary(target_node)
5668       self._GoStandalone()
5669       self._GoReconnect(False)
5670       self._WaitUntilSync()
5671     except errors.OpExecError, err:
5672       self.lu.LogWarning("Migration failed and I can't reconnect the"
5673                          " drives: error '%s'\n"
5674                          "Please look and recover the instance status" %
5675                          str(err))
5676
5677   def _AbortMigration(self):
5678     """Call the hypervisor code to abort a started migration.
5679
5680     """
5681     instance = self.instance
5682     target_node = self.target_node
5683     migration_info = self.migration_info
5684
5685     abort_result = self.rpc.call_finalize_migration(target_node,
5686                                                     instance,
5687                                                     migration_info,
5688                                                     False)
5689     abort_msg = abort_result.fail_msg
5690     if abort_msg:
5691       logging.error("Aborting migration failed on target node %s: %s",
5692                     target_node, abort_msg)
5693       # Don't raise an exception here, as we stil have to try to revert the
5694       # disk status, even if this step failed.
5695
5696   def _ExecMigration(self):
5697     """Migrate an instance.
5698
5699     The migrate is done by:
5700       - change the disks into dual-master mode
5701       - wait until disks are fully synchronized again
5702       - migrate the instance
5703       - change disks on the new secondary node (the old primary) to secondary
5704       - wait until disks are fully synchronized
5705       - change disks into single-master mode
5706
5707     """
5708     instance = self.instance
5709     target_node = self.target_node
5710     source_node = self.source_node
5711
5712     self.feedback_fn("* checking disk consistency between source and target")
5713     for dev in instance.disks:
5714       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
5715         raise errors.OpExecError("Disk %s is degraded or not fully"
5716                                  " synchronized on target node,"
5717                                  " aborting migrate." % dev.iv_name)
5718
5719     # First get the migration information from the remote node
5720     result = self.rpc.call_migration_info(source_node, instance)
5721     msg = result.fail_msg
5722     if msg:
5723       log_err = ("Failed fetching source migration information from %s: %s" %
5724                  (source_node, msg))
5725       logging.error(log_err)
5726       raise errors.OpExecError(log_err)
5727
5728     self.migration_info = migration_info = result.payload
5729
5730     # Then switch the disks to master/master mode
5731     self._EnsureSecondary(target_node)
5732     self._GoStandalone()
5733     self._GoReconnect(True)
5734     self._WaitUntilSync()
5735
5736     self.feedback_fn("* preparing %s to accept the instance" % target_node)
5737     result = self.rpc.call_accept_instance(target_node,
5738                                            instance,
5739                                            migration_info,
5740                                            self.nodes_ip[target_node])
5741
5742     msg = result.fail_msg
5743     if msg:
5744       logging.error("Instance pre-migration failed, trying to revert"
5745                     " disk status: %s", msg)
5746       self.feedback_fn("Pre-migration failed, aborting")
5747       self._AbortMigration()
5748       self._RevertDiskStatus()
5749       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
5750                                (instance.name, msg))
5751
5752     self.feedback_fn("* migrating instance to %s" % target_node)
5753     time.sleep(10)
5754     result = self.rpc.call_instance_migrate(source_node, instance,
5755                                             self.nodes_ip[target_node],
5756                                             self.live)
5757     msg = result.fail_msg
5758     if msg:
5759       logging.error("Instance migration failed, trying to revert"
5760                     " disk status: %s", msg)
5761       self.feedback_fn("Migration failed, aborting")
5762       self._AbortMigration()
5763       self._RevertDiskStatus()
5764       raise errors.OpExecError("Could not migrate instance %s: %s" %
5765                                (instance.name, msg))
5766     time.sleep(10)
5767
5768     instance.primary_node = target_node
5769     # distribute new instance config to the other nodes
5770     self.cfg.Update(instance, self.feedback_fn)
5771
5772     result = self.rpc.call_finalize_migration(target_node,
5773                                               instance,
5774                                               migration_info,
5775                                               True)
5776     msg = result.fail_msg
5777     if msg:
5778       logging.error("Instance migration succeeded, but finalization failed:"
5779                     " %s", msg)
5780       raise errors.OpExecError("Could not finalize instance migration: %s" %
5781                                msg)
5782
5783     self._EnsureSecondary(source_node)
5784     self._WaitUntilSync()
5785     self._GoStandalone()
5786     self._GoReconnect(False)
5787     self._WaitUntilSync()
5788
5789     self.feedback_fn("* done")
5790
5791   def Exec(self, feedback_fn):
5792     """Perform the migration.
5793
5794     """
5795     feedback_fn("Migrating instance %s" % self.instance.name)
5796
5797     self.feedback_fn = feedback_fn
5798
5799     self.source_node = self.instance.primary_node
5800     self.target_node = self.instance.secondary_nodes[0]
5801     self.all_nodes = [self.source_node, self.target_node]
5802     self.nodes_ip = {
5803       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
5804       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
5805       }
5806
5807     if self.cleanup:
5808       return self._ExecCleanup()
5809     else:
5810       return self._ExecMigration()
5811
5812
5813 def _CreateBlockDev(lu, node, instance, device, force_create,
5814                     info, force_open):
5815   """Create a tree of block devices on a given node.
5816
5817   If this device type has to be created on secondaries, create it and
5818   all its children.
5819
5820   If not, just recurse to children keeping the same 'force' value.
5821
5822   @param lu: the lu on whose behalf we execute
5823   @param node: the node on which to create the device
5824   @type instance: L{objects.Instance}
5825   @param instance: the instance which owns the device
5826   @type device: L{objects.Disk}
5827   @param device: the device to create
5828   @type force_create: boolean
5829   @param force_create: whether to force creation of this device; this
5830       will be change to True whenever we find a device which has
5831       CreateOnSecondary() attribute
5832   @param info: the extra 'metadata' we should attach to the device
5833       (this will be represented as a LVM tag)
5834   @type force_open: boolean
5835   @param force_open: this parameter will be passes to the
5836       L{backend.BlockdevCreate} function where it specifies
5837       whether we run on primary or not, and it affects both
5838       the child assembly and the device own Open() execution
5839
5840   """
5841   if device.CreateOnSecondary():
5842     force_create = True
5843
5844   if device.children:
5845     for child in device.children:
5846       _CreateBlockDev(lu, node, instance, child, force_create,
5847                       info, force_open)
5848
5849   if not force_create:
5850     return
5851
5852   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
5853
5854
5855 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
5856   """Create a single block device on a given node.
5857
5858   This will not recurse over children of the device, so they must be
5859   created in advance.
5860
5861   @param lu: the lu on whose behalf we execute
5862   @param node: the node on which to create the device
5863   @type instance: L{objects.Instance}
5864   @param instance: the instance which owns the device
5865   @type device: L{objects.Disk}
5866   @param device: the device to create
5867   @param info: the extra 'metadata' we should attach to the device
5868       (this will be represented as a LVM tag)
5869   @type force_open: boolean
5870   @param force_open: this parameter will be passes to the
5871       L{backend.BlockdevCreate} function where it specifies
5872       whether we run on primary or not, and it affects both
5873       the child assembly and the device own Open() execution
5874
5875   """
5876   lu.cfg.SetDiskID(device, node)
5877   result = lu.rpc.call_blockdev_create(node, device, device.size,
5878                                        instance.name, force_open, info)
5879   result.Raise("Can't create block device %s on"
5880                " node %s for instance %s" % (device, node, instance.name))
5881   if device.physical_id is None:
5882     device.physical_id = result.payload
5883
5884
5885 def _GenerateUniqueNames(lu, exts):
5886   """Generate a suitable LV name.
5887
5888   This will generate a logical volume name for the given instance.
5889
5890   """
5891   results = []
5892   for val in exts:
5893     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
5894     results.append("%s%s" % (new_id, val))
5895   return results
5896
5897
5898 def _GenerateDRBD8Branch(lu, primary, secondary, size, names, iv_name,
5899                          p_minor, s_minor):
5900   """Generate a drbd8 device complete with its children.
5901
5902   """
5903   port = lu.cfg.AllocatePort()
5904   vgname = lu.cfg.GetVGName()
5905   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
5906   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
5907                           logical_id=(vgname, names[0]))
5908   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
5909                           logical_id=(vgname, names[1]))
5910   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
5911                           logical_id=(primary, secondary, port,
5912                                       p_minor, s_minor,
5913                                       shared_secret),
5914                           children=[dev_data, dev_meta],
5915                           iv_name=iv_name)
5916   return drbd_dev
5917
5918
5919 def _GenerateDiskTemplate(lu, template_name,
5920                           instance_name, primary_node,
5921                           secondary_nodes, disk_info,
5922                           file_storage_dir, file_driver,
5923                           base_index):
5924   """Generate the entire disk layout for a given template type.
5925
5926   """
5927   #TODO: compute space requirements
5928
5929   vgname = lu.cfg.GetVGName()
5930   disk_count = len(disk_info)
5931   disks = []
5932   if template_name == constants.DT_DISKLESS:
5933     pass
5934   elif template_name == constants.DT_PLAIN:
5935     if len(secondary_nodes) != 0:
5936       raise errors.ProgrammerError("Wrong template configuration")
5937
5938     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5939                                       for i in range(disk_count)])
5940     for idx, disk in enumerate(disk_info):
5941       disk_index = idx + base_index
5942       disk_dev = objects.Disk(dev_type=constants.LD_LV, size=disk["size"],
5943                               logical_id=(vgname, names[idx]),
5944                               iv_name="disk/%d" % disk_index,
5945                               mode=disk["mode"])
5946       disks.append(disk_dev)
5947   elif template_name == constants.DT_DRBD8:
5948     if len(secondary_nodes) != 1:
5949       raise errors.ProgrammerError("Wrong template configuration")
5950     remote_node = secondary_nodes[0]
5951     minors = lu.cfg.AllocateDRBDMinor(
5952       [primary_node, remote_node] * len(disk_info), instance_name)
5953
5954     names = []
5955     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
5956                                                for i in range(disk_count)]):
5957       names.append(lv_prefix + "_data")
5958       names.append(lv_prefix + "_meta")
5959     for idx, disk in enumerate(disk_info):
5960       disk_index = idx + base_index
5961       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
5962                                       disk["size"], names[idx*2:idx*2+2],
5963                                       "disk/%d" % disk_index,
5964                                       minors[idx*2], minors[idx*2+1])
5965       disk_dev.mode = disk["mode"]
5966       disks.append(disk_dev)
5967   elif template_name == constants.DT_FILE:
5968     if len(secondary_nodes) != 0:
5969       raise errors.ProgrammerError("Wrong template configuration")
5970
5971     _RequireFileStorage()
5972
5973     for idx, disk in enumerate(disk_info):
5974       disk_index = idx + base_index
5975       disk_dev = objects.Disk(dev_type=constants.LD_FILE, size=disk["size"],
5976                               iv_name="disk/%d" % disk_index,
5977                               logical_id=(file_driver,
5978                                           "%s/disk%d" % (file_storage_dir,
5979                                                          disk_index)),
5980                               mode=disk["mode"])
5981       disks.append(disk_dev)
5982   else:
5983     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
5984   return disks
5985
5986
5987 def _GetInstanceInfoText(instance):
5988   """Compute that text that should be added to the disk's metadata.
5989
5990   """
5991   return "originstname+%s" % instance.name
5992
5993
5994 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
5995   """Create all disks for an instance.
5996
5997   This abstracts away some work from AddInstance.
5998
5999   @type lu: L{LogicalUnit}
6000   @param lu: the logical unit on whose behalf we execute
6001   @type instance: L{objects.Instance}
6002   @param instance: the instance whose disks we should create
6003   @type to_skip: list
6004   @param to_skip: list of indices to skip
6005   @type target_node: string
6006   @param target_node: if passed, overrides the target node for creation
6007   @rtype: boolean
6008   @return: the success of the creation
6009
6010   """
6011   info = _GetInstanceInfoText(instance)
6012   if target_node is None:
6013     pnode = instance.primary_node
6014     all_nodes = instance.all_nodes
6015   else:
6016     pnode = target_node
6017     all_nodes = [pnode]
6018
6019   if instance.disk_template == constants.DT_FILE:
6020     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6021     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
6022
6023     result.Raise("Failed to create directory '%s' on"
6024                  " node %s" % (file_storage_dir, pnode))
6025
6026   # Note: this needs to be kept in sync with adding of disks in
6027   # LUSetInstanceParams
6028   for idx, device in enumerate(instance.disks):
6029     if to_skip and idx in to_skip:
6030       continue
6031     logging.info("Creating volume %s for instance %s",
6032                  device.iv_name, instance.name)
6033     #HARDCODE
6034     for node in all_nodes:
6035       f_create = node == pnode
6036       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
6037
6038
6039 def _RemoveDisks(lu, instance, target_node=None):
6040   """Remove all disks for an instance.
6041
6042   This abstracts away some work from `AddInstance()` and
6043   `RemoveInstance()`. Note that in case some of the devices couldn't
6044   be removed, the removal will continue with the other ones (compare
6045   with `_CreateDisks()`).
6046
6047   @type lu: L{LogicalUnit}
6048   @param lu: the logical unit on whose behalf we execute
6049   @type instance: L{objects.Instance}
6050   @param instance: the instance whose disks we should remove
6051   @type target_node: string
6052   @param target_node: used to override the node on which to remove the disks
6053   @rtype: boolean
6054   @return: the success of the removal
6055
6056   """
6057   logging.info("Removing block devices for instance %s", instance.name)
6058
6059   all_result = True
6060   for device in instance.disks:
6061     if target_node:
6062       edata = [(target_node, device)]
6063     else:
6064       edata = device.ComputeNodeTree(instance.primary_node)
6065     for node, disk in edata:
6066       lu.cfg.SetDiskID(disk, node)
6067       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
6068       if msg:
6069         lu.LogWarning("Could not remove block device %s on node %s,"
6070                       " continuing anyway: %s", device.iv_name, node, msg)
6071         all_result = False
6072
6073   if instance.disk_template == constants.DT_FILE:
6074     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
6075     if target_node:
6076       tgt = target_node
6077     else:
6078       tgt = instance.primary_node
6079     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
6080     if result.fail_msg:
6081       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
6082                     file_storage_dir, instance.primary_node, result.fail_msg)
6083       all_result = False
6084
6085   return all_result
6086
6087
6088 def _ComputeDiskSize(disk_template, disks):
6089   """Compute disk size requirements in the volume group
6090
6091   """
6092   # Required free disk space as a function of disk and swap space
6093   req_size_dict = {
6094     constants.DT_DISKLESS: None,
6095     constants.DT_PLAIN: sum(d["size"] for d in disks),
6096     # 128 MB are added for drbd metadata for each disk
6097     constants.DT_DRBD8: sum(d["size"] + 128 for d in disks),
6098     constants.DT_FILE: None,
6099   }
6100
6101   if disk_template not in req_size_dict:
6102     raise errors.ProgrammerError("Disk template '%s' size requirement"
6103                                  " is unknown" %  disk_template)
6104
6105   return req_size_dict[disk_template]
6106
6107
6108 def _CheckHVParams(lu, nodenames, hvname, hvparams):
6109   """Hypervisor parameter validation.
6110
6111   This function abstract the hypervisor parameter validation to be
6112   used in both instance create and instance modify.
6113
6114   @type lu: L{LogicalUnit}
6115   @param lu: the logical unit for which we check
6116   @type nodenames: list
6117   @param nodenames: the list of nodes on which we should check
6118   @type hvname: string
6119   @param hvname: the name of the hypervisor we should use
6120   @type hvparams: dict
6121   @param hvparams: the parameters which we need to check
6122   @raise errors.OpPrereqError: if the parameters are not valid
6123
6124   """
6125   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
6126                                                   hvname,
6127                                                   hvparams)
6128   for node in nodenames:
6129     info = hvinfo[node]
6130     if info.offline:
6131       continue
6132     info.Raise("Hypervisor parameter validation failed on node %s" % node)
6133
6134
6135 def _CheckOSParams(lu, required, nodenames, osname, osparams):
6136   """OS parameters validation.
6137
6138   @type lu: L{LogicalUnit}
6139   @param lu: the logical unit for which we check
6140   @type required: boolean
6141   @param required: whether the validation should fail if the OS is not
6142       found
6143   @type nodenames: list
6144   @param nodenames: the list of nodes on which we should check
6145   @type osname: string
6146   @param osname: the name of the hypervisor we should use
6147   @type osparams: dict
6148   @param osparams: the parameters which we need to check
6149   @raise errors.OpPrereqError: if the parameters are not valid
6150
6151   """
6152   result = lu.rpc.call_os_validate(required, nodenames, osname,
6153                                    [constants.OS_VALIDATE_PARAMETERS],
6154                                    osparams)
6155   for node, nres in result.items():
6156     # we don't check for offline cases since this should be run only
6157     # against the master node and/or an instance's nodes
6158     nres.Raise("OS Parameters validation failed on node %s" % node)
6159     if not nres.payload:
6160       lu.LogInfo("OS %s not found on node %s, validation skipped",
6161                  osname, node)
6162
6163
6164 class LUCreateInstance(LogicalUnit):
6165   """Create an instance.
6166
6167   """
6168   HPATH = "instance-add"
6169   HTYPE = constants.HTYPE_INSTANCE
6170   _OP_REQP = ["instance_name", "disks",
6171               "mode", "start",
6172               "wait_for_sync", "ip_check", "nics",
6173               "hvparams", "beparams", "osparams"]
6174   REQ_BGL = False
6175
6176   def CheckArguments(self):
6177     """Check arguments.
6178
6179     """
6180     # set optional parameters to none if they don't exist
6181     for attr in ["pnode", "snode", "iallocator", "hypervisor",
6182                  "disk_template", "identify_defaults"]:
6183       if not hasattr(self.op, attr):
6184         setattr(self.op, attr, None)
6185
6186     # do not require name_check to ease forward/backward compatibility
6187     # for tools
6188     if not hasattr(self.op, "name_check"):
6189       self.op.name_check = True
6190     if not hasattr(self.op, "no_install"):
6191       self.op.no_install = False
6192     if self.op.no_install and self.op.start:
6193       self.LogInfo("No-installation mode selected, disabling startup")
6194       self.op.start = False
6195     # validate/normalize the instance name
6196     self.op.instance_name = utils.HostInfo.NormalizeName(self.op.instance_name)
6197     if self.op.ip_check and not self.op.name_check:
6198       # TODO: make the ip check more flexible and not depend on the name check
6199       raise errors.OpPrereqError("Cannot do ip checks without a name check",
6200                                  errors.ECODE_INVAL)
6201
6202     # check nics' parameter names
6203     for nic in self.op.nics:
6204       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
6205
6206     # check disks. parameter names and consistent adopt/no-adopt strategy
6207     has_adopt = has_no_adopt = False
6208     for disk in self.op.disks:
6209       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
6210       if "adopt" in disk:
6211         has_adopt = True
6212       else:
6213         has_no_adopt = True
6214     if has_adopt and has_no_adopt:
6215       raise errors.OpPrereqError("Either all disks are adopted or none is",
6216                                  errors.ECODE_INVAL)
6217     if has_adopt:
6218       if self.op.disk_template != constants.DT_PLAIN:
6219         raise errors.OpPrereqError("Disk adoption is only supported for the"
6220                                    " 'plain' disk template",
6221                                    errors.ECODE_INVAL)
6222       if self.op.iallocator is not None:
6223         raise errors.OpPrereqError("Disk adoption not allowed with an"
6224                                    " iallocator script", errors.ECODE_INVAL)
6225       if self.op.mode == constants.INSTANCE_IMPORT:
6226         raise errors.OpPrereqError("Disk adoption not allowed for"
6227                                    " instance import", errors.ECODE_INVAL)
6228
6229     self.adopt_disks = has_adopt
6230
6231     # verify creation mode
6232     if self.op.mode not in constants.INSTANCE_CREATE_MODES:
6233       raise errors.OpPrereqError("Invalid instance creation mode '%s'" %
6234                                  self.op.mode, errors.ECODE_INVAL)
6235
6236     # instance name verification
6237     if self.op.name_check:
6238       self.hostname1 = utils.GetHostInfo(self.op.instance_name)
6239       self.op.instance_name = self.hostname1.name
6240       # used in CheckPrereq for ip ping check
6241       self.check_ip = self.hostname1.ip
6242     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6243       raise errors.OpPrereqError("Remote imports require names to be checked" %
6244                                  errors.ECODE_INVAL)
6245     else:
6246       self.check_ip = None
6247
6248     # file storage checks
6249     if (self.op.file_driver and
6250         not self.op.file_driver in constants.FILE_DRIVER):
6251       raise errors.OpPrereqError("Invalid file driver name '%s'" %
6252                                  self.op.file_driver, errors.ECODE_INVAL)
6253
6254     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
6255       raise errors.OpPrereqError("File storage directory path not absolute",
6256                                  errors.ECODE_INVAL)
6257
6258     ### Node/iallocator related checks
6259     if [self.op.iallocator, self.op.pnode].count(None) != 1:
6260       raise errors.OpPrereqError("One and only one of iallocator and primary"
6261                                  " node must be given",
6262                                  errors.ECODE_INVAL)
6263
6264     self._cds = _GetClusterDomainSecret()
6265
6266     if self.op.mode == constants.INSTANCE_IMPORT:
6267       # On import force_variant must be True, because if we forced it at
6268       # initial install, our only chance when importing it back is that it
6269       # works again!
6270       self.op.force_variant = True
6271
6272       if self.op.no_install:
6273         self.LogInfo("No-installation mode has no effect during import")
6274
6275     elif self.op.mode == constants.INSTANCE_CREATE:
6276       if getattr(self.op, "os_type", None) is None:
6277         raise errors.OpPrereqError("No guest OS specified",
6278                                    errors.ECODE_INVAL)
6279       self.op.force_variant = getattr(self.op, "force_variant", False)
6280       if self.op.disk_template is None:
6281         raise errors.OpPrereqError("No disk template specified",
6282                                    errors.ECODE_INVAL)
6283
6284     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
6285       # Check handshake to ensure both clusters have the same domain secret
6286       src_handshake = getattr(self.op, "source_handshake", None)
6287       if not src_handshake:
6288         raise errors.OpPrereqError("Missing source handshake",
6289                                    errors.ECODE_INVAL)
6290
6291       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
6292                                                            src_handshake)
6293       if errmsg:
6294         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
6295                                    errors.ECODE_INVAL)
6296
6297       # Load and check source CA
6298       self.source_x509_ca_pem = getattr(self.op, "source_x509_ca", None)
6299       if not self.source_x509_ca_pem:
6300         raise errors.OpPrereqError("Missing source X509 CA",
6301                                    errors.ECODE_INVAL)
6302
6303       try:
6304         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
6305                                                     self._cds)
6306       except OpenSSL.crypto.Error, err:
6307         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
6308                                    (err, ), errors.ECODE_INVAL)
6309
6310       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
6311       if errcode is not None:
6312         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
6313                                    errors.ECODE_INVAL)
6314
6315       self.source_x509_ca = cert
6316
6317       src_instance_name = getattr(self.op, "source_instance_name", None)
6318       if not src_instance_name:
6319         raise errors.OpPrereqError("Missing source instance name",
6320                                    errors.ECODE_INVAL)
6321
6322       self.source_instance_name = \
6323         utils.GetHostInfo(utils.HostInfo.NormalizeName(src_instance_name)).name
6324
6325     else:
6326       raise errors.OpPrereqError("Invalid instance creation mode %r" %
6327                                  self.op.mode, errors.ECODE_INVAL)
6328
6329   def ExpandNames(self):
6330     """ExpandNames for CreateInstance.
6331
6332     Figure out the right locks for instance creation.
6333
6334     """
6335     self.needed_locks = {}
6336
6337     instance_name = self.op.instance_name
6338     # this is just a preventive check, but someone might still add this
6339     # instance in the meantime, and creation will fail at lock-add time
6340     if instance_name in self.cfg.GetInstanceList():
6341       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6342                                  instance_name, errors.ECODE_EXISTS)
6343
6344     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
6345
6346     if self.op.iallocator:
6347       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6348     else:
6349       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
6350       nodelist = [self.op.pnode]
6351       if self.op.snode is not None:
6352         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
6353         nodelist.append(self.op.snode)
6354       self.needed_locks[locking.LEVEL_NODE] = nodelist
6355
6356     # in case of import lock the source node too
6357     if self.op.mode == constants.INSTANCE_IMPORT:
6358       src_node = getattr(self.op, "src_node", None)
6359       src_path = getattr(self.op, "src_path", None)
6360
6361       if src_path is None:
6362         self.op.src_path = src_path = self.op.instance_name
6363
6364       if src_node is None:
6365         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6366         self.op.src_node = None
6367         if os.path.isabs(src_path):
6368           raise errors.OpPrereqError("Importing an instance from an absolute"
6369                                      " path requires a source node option.",
6370                                      errors.ECODE_INVAL)
6371       else:
6372         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
6373         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
6374           self.needed_locks[locking.LEVEL_NODE].append(src_node)
6375         if not os.path.isabs(src_path):
6376           self.op.src_path = src_path = \
6377             utils.PathJoin(constants.EXPORT_DIR, src_path)
6378
6379   def _RunAllocator(self):
6380     """Run the allocator based on input opcode.
6381
6382     """
6383     nics = [n.ToDict() for n in self.nics]
6384     ial = IAllocator(self.cfg, self.rpc,
6385                      mode=constants.IALLOCATOR_MODE_ALLOC,
6386                      name=self.op.instance_name,
6387                      disk_template=self.op.disk_template,
6388                      tags=[],
6389                      os=self.op.os_type,
6390                      vcpus=self.be_full[constants.BE_VCPUS],
6391                      mem_size=self.be_full[constants.BE_MEMORY],
6392                      disks=self.disks,
6393                      nics=nics,
6394                      hypervisor=self.op.hypervisor,
6395                      )
6396
6397     ial.Run(self.op.iallocator)
6398
6399     if not ial.success:
6400       raise errors.OpPrereqError("Can't compute nodes using"
6401                                  " iallocator '%s': %s" %
6402                                  (self.op.iallocator, ial.info),
6403                                  errors.ECODE_NORES)
6404     if len(ial.result) != ial.required_nodes:
6405       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6406                                  " of nodes (%s), required %s" %
6407                                  (self.op.iallocator, len(ial.result),
6408                                   ial.required_nodes), errors.ECODE_FAULT)
6409     self.op.pnode = ial.result[0]
6410     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6411                  self.op.instance_name, self.op.iallocator,
6412                  utils.CommaJoin(ial.result))
6413     if ial.required_nodes == 2:
6414       self.op.snode = ial.result[1]
6415
6416   def BuildHooksEnv(self):
6417     """Build hooks env.
6418
6419     This runs on master, primary and secondary nodes of the instance.
6420
6421     """
6422     env = {
6423       "ADD_MODE": self.op.mode,
6424       }
6425     if self.op.mode == constants.INSTANCE_IMPORT:
6426       env["SRC_NODE"] = self.op.src_node
6427       env["SRC_PATH"] = self.op.src_path
6428       env["SRC_IMAGES"] = self.src_images
6429
6430     env.update(_BuildInstanceHookEnv(
6431       name=self.op.instance_name,
6432       primary_node=self.op.pnode,
6433       secondary_nodes=self.secondaries,
6434       status=self.op.start,
6435       os_type=self.op.os_type,
6436       memory=self.be_full[constants.BE_MEMORY],
6437       vcpus=self.be_full[constants.BE_VCPUS],
6438       nics=_NICListToTuple(self, self.nics),
6439       disk_template=self.op.disk_template,
6440       disks=[(d["size"], d["mode"]) for d in self.disks],
6441       bep=self.be_full,
6442       hvp=self.hv_full,
6443       hypervisor_name=self.op.hypervisor,
6444     ))
6445
6446     nl = ([self.cfg.GetMasterNode(), self.op.pnode] +
6447           self.secondaries)
6448     return env, nl, nl
6449
6450   def _ReadExportInfo(self):
6451     """Reads the export information from disk.
6452
6453     It will override the opcode source node and path with the actual
6454     information, if these two were not specified before.
6455
6456     @return: the export information
6457
6458     """
6459     assert self.op.mode == constants.INSTANCE_IMPORT
6460
6461     src_node = self.op.src_node
6462     src_path = self.op.src_path
6463
6464     if src_node is None:
6465       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
6466       exp_list = self.rpc.call_export_list(locked_nodes)
6467       found = False
6468       for node in exp_list:
6469         if exp_list[node].fail_msg:
6470           continue
6471         if src_path in exp_list[node].payload:
6472           found = True
6473           self.op.src_node = src_node = node
6474           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
6475                                                        src_path)
6476           break
6477       if not found:
6478         raise errors.OpPrereqError("No export found for relative path %s" %
6479                                     src_path, errors.ECODE_INVAL)
6480
6481     _CheckNodeOnline(self, src_node)
6482     result = self.rpc.call_export_info(src_node, src_path)
6483     result.Raise("No export or invalid export found in dir %s" % src_path)
6484
6485     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
6486     if not export_info.has_section(constants.INISECT_EXP):
6487       raise errors.ProgrammerError("Corrupted export config",
6488                                    errors.ECODE_ENVIRON)
6489
6490     ei_version = export_info.get(constants.INISECT_EXP, "version")
6491     if (int(ei_version) != constants.EXPORT_VERSION):
6492       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
6493                                  (ei_version, constants.EXPORT_VERSION),
6494                                  errors.ECODE_ENVIRON)
6495     return export_info
6496
6497   def _ReadExportParams(self, einfo):
6498     """Use export parameters as defaults.
6499
6500     In case the opcode doesn't specify (as in override) some instance
6501     parameters, then try to use them from the export information, if
6502     that declares them.
6503
6504     """
6505     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
6506
6507     if self.op.disk_template is None:
6508       if einfo.has_option(constants.INISECT_INS, "disk_template"):
6509         self.op.disk_template = einfo.get(constants.INISECT_INS,
6510                                           "disk_template")
6511       else:
6512         raise errors.OpPrereqError("No disk template specified and the export"
6513                                    " is missing the disk_template information",
6514                                    errors.ECODE_INVAL)
6515
6516     if not self.op.disks:
6517       if einfo.has_option(constants.INISECT_INS, "disk_count"):
6518         disks = []
6519         # TODO: import the disk iv_name too
6520         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
6521           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
6522           disks.append({"size": disk_sz})
6523         self.op.disks = disks
6524       else:
6525         raise errors.OpPrereqError("No disk info specified and the export"
6526                                    " is missing the disk information",
6527                                    errors.ECODE_INVAL)
6528
6529     if (not self.op.nics and
6530         einfo.has_option(constants.INISECT_INS, "nic_count")):
6531       nics = []
6532       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
6533         ndict = {}
6534         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
6535           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
6536           ndict[name] = v
6537         nics.append(ndict)
6538       self.op.nics = nics
6539
6540     if (self.op.hypervisor is None and
6541         einfo.has_option(constants.INISECT_INS, "hypervisor")):
6542       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
6543     if einfo.has_section(constants.INISECT_HYP):
6544       # use the export parameters but do not override the ones
6545       # specified by the user
6546       for name, value in einfo.items(constants.INISECT_HYP):
6547         if name not in self.op.hvparams:
6548           self.op.hvparams[name] = value
6549
6550     if einfo.has_section(constants.INISECT_BEP):
6551       # use the parameters, without overriding
6552       for name, value in einfo.items(constants.INISECT_BEP):
6553         if name not in self.op.beparams:
6554           self.op.beparams[name] = value
6555     else:
6556       # try to read the parameters old style, from the main section
6557       for name in constants.BES_PARAMETERS:
6558         if (name not in self.op.beparams and
6559             einfo.has_option(constants.INISECT_INS, name)):
6560           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
6561
6562     if einfo.has_section(constants.INISECT_OSP):
6563       # use the parameters, without overriding
6564       for name, value in einfo.items(constants.INISECT_OSP):
6565         if name not in self.op.osparams:
6566           self.op.osparams[name] = value
6567
6568   def _RevertToDefaults(self, cluster):
6569     """Revert the instance parameters to the default values.
6570
6571     """
6572     # hvparams
6573     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
6574     for name in self.op.hvparams.keys():
6575       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
6576         del self.op.hvparams[name]
6577     # beparams
6578     be_defs = cluster.SimpleFillBE({})
6579     for name in self.op.beparams.keys():
6580       if name in be_defs and be_defs[name] == self.op.beparams[name]:
6581         del self.op.beparams[name]
6582     # nic params
6583     nic_defs = cluster.SimpleFillNIC({})
6584     for nic in self.op.nics:
6585       for name in constants.NICS_PARAMETERS:
6586         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
6587           del nic[name]
6588     # osparams
6589     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
6590     for name in self.op.osparams.keys():
6591       if name in os_defs and os_defs[name] == self.op.osparams[name]:
6592         del self.op.osparams[name]
6593
6594   def CheckPrereq(self):
6595     """Check prerequisites.
6596
6597     """
6598     if self.op.mode == constants.INSTANCE_IMPORT:
6599       export_info = self._ReadExportInfo()
6600       self._ReadExportParams(export_info)
6601
6602     _CheckDiskTemplate(self.op.disk_template)
6603
6604     if (not self.cfg.GetVGName() and
6605         self.op.disk_template not in constants.DTS_NOT_LVM):
6606       raise errors.OpPrereqError("Cluster does not support lvm-based"
6607                                  " instances", errors.ECODE_STATE)
6608
6609     if self.op.hypervisor is None:
6610       self.op.hypervisor = self.cfg.GetHypervisorType()
6611
6612     cluster = self.cfg.GetClusterInfo()
6613     enabled_hvs = cluster.enabled_hypervisors
6614     if self.op.hypervisor not in enabled_hvs:
6615       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
6616                                  " cluster (%s)" % (self.op.hypervisor,
6617                                   ",".join(enabled_hvs)),
6618                                  errors.ECODE_STATE)
6619
6620     # check hypervisor parameter syntax (locally)
6621     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
6622     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
6623                                       self.op.hvparams)
6624     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
6625     hv_type.CheckParameterSyntax(filled_hvp)
6626     self.hv_full = filled_hvp
6627     # check that we don't specify global parameters on an instance
6628     _CheckGlobalHvParams(self.op.hvparams)
6629
6630     # fill and remember the beparams dict
6631     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
6632     self.be_full = cluster.SimpleFillBE(self.op.beparams)
6633
6634     # build os parameters
6635     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
6636
6637     # now that hvp/bep are in final format, let's reset to defaults,
6638     # if told to do so
6639     if self.op.identify_defaults:
6640       self._RevertToDefaults(cluster)
6641
6642     # NIC buildup
6643     self.nics = []
6644     for idx, nic in enumerate(self.op.nics):
6645       nic_mode_req = nic.get("mode", None)
6646       nic_mode = nic_mode_req
6647       if nic_mode is None:
6648         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
6649
6650       # in routed mode, for the first nic, the default ip is 'auto'
6651       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
6652         default_ip_mode = constants.VALUE_AUTO
6653       else:
6654         default_ip_mode = constants.VALUE_NONE
6655
6656       # ip validity checks
6657       ip = nic.get("ip", default_ip_mode)
6658       if ip is None or ip.lower() == constants.VALUE_NONE:
6659         nic_ip = None
6660       elif ip.lower() == constants.VALUE_AUTO:
6661         if not self.op.name_check:
6662           raise errors.OpPrereqError("IP address set to auto but name checks"
6663                                      " have been skipped. Aborting.",
6664                                      errors.ECODE_INVAL)
6665         nic_ip = self.hostname1.ip
6666       else:
6667         if not utils.IsValidIP(ip):
6668           raise errors.OpPrereqError("Given IP address '%s' doesn't look"
6669                                      " like a valid IP" % ip,
6670                                      errors.ECODE_INVAL)
6671         nic_ip = ip
6672
6673       # TODO: check the ip address for uniqueness
6674       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
6675         raise errors.OpPrereqError("Routed nic mode requires an ip address",
6676                                    errors.ECODE_INVAL)
6677
6678       # MAC address verification
6679       mac = nic.get("mac", constants.VALUE_AUTO)
6680       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6681         mac = utils.NormalizeAndValidateMac(mac)
6682
6683         try:
6684           self.cfg.ReserveMAC(mac, self.proc.GetECId())
6685         except errors.ReservationError:
6686           raise errors.OpPrereqError("MAC address %s already in use"
6687                                      " in cluster" % mac,
6688                                      errors.ECODE_NOTUNIQUE)
6689
6690       # bridge verification
6691       bridge = nic.get("bridge", None)
6692       link = nic.get("link", None)
6693       if bridge and link:
6694         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
6695                                    " at the same time", errors.ECODE_INVAL)
6696       elif bridge and nic_mode == constants.NIC_MODE_ROUTED:
6697         raise errors.OpPrereqError("Cannot pass 'bridge' on a routed nic",
6698                                    errors.ECODE_INVAL)
6699       elif bridge:
6700         link = bridge
6701
6702       nicparams = {}
6703       if nic_mode_req:
6704         nicparams[constants.NIC_MODE] = nic_mode_req
6705       if link:
6706         nicparams[constants.NIC_LINK] = link
6707
6708       check_params = cluster.SimpleFillNIC(nicparams)
6709       objects.NIC.CheckParameterSyntax(check_params)
6710       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
6711
6712     # disk checks/pre-build
6713     self.disks = []
6714     for disk in self.op.disks:
6715       mode = disk.get("mode", constants.DISK_RDWR)
6716       if mode not in constants.DISK_ACCESS_SET:
6717         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
6718                                    mode, errors.ECODE_INVAL)
6719       size = disk.get("size", None)
6720       if size is None:
6721         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
6722       try:
6723         size = int(size)
6724       except (TypeError, ValueError):
6725         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
6726                                    errors.ECODE_INVAL)
6727       new_disk = {"size": size, "mode": mode}
6728       if "adopt" in disk:
6729         new_disk["adopt"] = disk["adopt"]
6730       self.disks.append(new_disk)
6731
6732     if self.op.mode == constants.INSTANCE_IMPORT:
6733
6734       # Check that the new instance doesn't have less disks than the export
6735       instance_disks = len(self.disks)
6736       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
6737       if instance_disks < export_disks:
6738         raise errors.OpPrereqError("Not enough disks to import."
6739                                    " (instance: %d, export: %d)" %
6740                                    (instance_disks, export_disks),
6741                                    errors.ECODE_INVAL)
6742
6743       disk_images = []
6744       for idx in range(export_disks):
6745         option = 'disk%d_dump' % idx
6746         if export_info.has_option(constants.INISECT_INS, option):
6747           # FIXME: are the old os-es, disk sizes, etc. useful?
6748           export_name = export_info.get(constants.INISECT_INS, option)
6749           image = utils.PathJoin(self.op.src_path, export_name)
6750           disk_images.append(image)
6751         else:
6752           disk_images.append(False)
6753
6754       self.src_images = disk_images
6755
6756       old_name = export_info.get(constants.INISECT_INS, 'name')
6757       try:
6758         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
6759       except (TypeError, ValueError), err:
6760         raise errors.OpPrereqError("Invalid export file, nic_count is not"
6761                                    " an integer: %s" % str(err),
6762                                    errors.ECODE_STATE)
6763       if self.op.instance_name == old_name:
6764         for idx, nic in enumerate(self.nics):
6765           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
6766             nic_mac_ini = 'nic%d_mac' % idx
6767             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
6768
6769     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
6770
6771     # ip ping checks (we use the same ip that was resolved in ExpandNames)
6772     if self.op.ip_check:
6773       if utils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
6774         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6775                                    (self.check_ip, self.op.instance_name),
6776                                    errors.ECODE_NOTUNIQUE)
6777
6778     #### mac address generation
6779     # By generating here the mac address both the allocator and the hooks get
6780     # the real final mac address rather than the 'auto' or 'generate' value.
6781     # There is a race condition between the generation and the instance object
6782     # creation, which means that we know the mac is valid now, but we're not
6783     # sure it will be when we actually add the instance. If things go bad
6784     # adding the instance will abort because of a duplicate mac, and the
6785     # creation job will fail.
6786     for nic in self.nics:
6787       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
6788         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
6789
6790     #### allocator run
6791
6792     if self.op.iallocator is not None:
6793       self._RunAllocator()
6794
6795     #### node related checks
6796
6797     # check primary node
6798     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
6799     assert self.pnode is not None, \
6800       "Cannot retrieve locked node %s" % self.op.pnode
6801     if pnode.offline:
6802       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
6803                                  pnode.name, errors.ECODE_STATE)
6804     if pnode.drained:
6805       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
6806                                  pnode.name, errors.ECODE_STATE)
6807
6808     self.secondaries = []
6809
6810     # mirror node verification
6811     if self.op.disk_template in constants.DTS_NET_MIRROR:
6812       if self.op.snode is None:
6813         raise errors.OpPrereqError("The networked disk templates need"
6814                                    " a mirror node", errors.ECODE_INVAL)
6815       if self.op.snode == pnode.name:
6816         raise errors.OpPrereqError("The secondary node cannot be the"
6817                                    " primary node.", errors.ECODE_INVAL)
6818       _CheckNodeOnline(self, self.op.snode)
6819       _CheckNodeNotDrained(self, self.op.snode)
6820       self.secondaries.append(self.op.snode)
6821
6822     nodenames = [pnode.name] + self.secondaries
6823
6824     req_size = _ComputeDiskSize(self.op.disk_template,
6825                                 self.disks)
6826
6827     # Check lv size requirements, if not adopting
6828     if req_size is not None and not self.adopt_disks:
6829       _CheckNodesFreeDisk(self, nodenames, req_size)
6830
6831     if self.adopt_disks: # instead, we must check the adoption data
6832       all_lvs = set([i["adopt"] for i in self.disks])
6833       if len(all_lvs) != len(self.disks):
6834         raise errors.OpPrereqError("Duplicate volume names given for adoption",
6835                                    errors.ECODE_INVAL)
6836       for lv_name in all_lvs:
6837         try:
6838           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
6839         except errors.ReservationError:
6840           raise errors.OpPrereqError("LV named %s used by another instance" %
6841                                      lv_name, errors.ECODE_NOTUNIQUE)
6842
6843       node_lvs = self.rpc.call_lv_list([pnode.name],
6844                                        self.cfg.GetVGName())[pnode.name]
6845       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
6846       node_lvs = node_lvs.payload
6847       delta = all_lvs.difference(node_lvs.keys())
6848       if delta:
6849         raise errors.OpPrereqError("Missing logical volume(s): %s" %
6850                                    utils.CommaJoin(delta),
6851                                    errors.ECODE_INVAL)
6852       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
6853       if online_lvs:
6854         raise errors.OpPrereqError("Online logical volumes found, cannot"
6855                                    " adopt: %s" % utils.CommaJoin(online_lvs),
6856                                    errors.ECODE_STATE)
6857       # update the size of disk based on what is found
6858       for dsk in self.disks:
6859         dsk["size"] = int(float(node_lvs[dsk["adopt"]][0]))
6860
6861     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
6862
6863     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
6864     # check OS parameters (remotely)
6865     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
6866
6867     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
6868
6869     # memory check on primary node
6870     if self.op.start:
6871       _CheckNodeFreeMemory(self, self.pnode.name,
6872                            "creating instance %s" % self.op.instance_name,
6873                            self.be_full[constants.BE_MEMORY],
6874                            self.op.hypervisor)
6875
6876     self.dry_run_result = list(nodenames)
6877
6878   def Exec(self, feedback_fn):
6879     """Create and add the instance to the cluster.
6880
6881     """
6882     instance = self.op.instance_name
6883     pnode_name = self.pnode.name
6884
6885     ht_kind = self.op.hypervisor
6886     if ht_kind in constants.HTS_REQ_PORT:
6887       network_port = self.cfg.AllocatePort()
6888     else:
6889       network_port = None
6890
6891     if constants.ENABLE_FILE_STORAGE:
6892       # this is needed because os.path.join does not accept None arguments
6893       if self.op.file_storage_dir is None:
6894         string_file_storage_dir = ""
6895       else:
6896         string_file_storage_dir = self.op.file_storage_dir
6897
6898       # build the full file storage dir path
6899       file_storage_dir = utils.PathJoin(self.cfg.GetFileStorageDir(),
6900                                         string_file_storage_dir, instance)
6901     else:
6902       file_storage_dir = ""
6903
6904     disks = _GenerateDiskTemplate(self,
6905                                   self.op.disk_template,
6906                                   instance, pnode_name,
6907                                   self.secondaries,
6908                                   self.disks,
6909                                   file_storage_dir,
6910                                   self.op.file_driver,
6911                                   0)
6912
6913     iobj = objects.Instance(name=instance, os=self.op.os_type,
6914                             primary_node=pnode_name,
6915                             nics=self.nics, disks=disks,
6916                             disk_template=self.op.disk_template,
6917                             admin_up=False,
6918                             network_port=network_port,
6919                             beparams=self.op.beparams,
6920                             hvparams=self.op.hvparams,
6921                             hypervisor=self.op.hypervisor,
6922                             osparams=self.op.osparams,
6923                             )
6924
6925     if self.adopt_disks:
6926       # rename LVs to the newly-generated names; we need to construct
6927       # 'fake' LV disks with the old data, plus the new unique_id
6928       tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
6929       rename_to = []
6930       for t_dsk, a_dsk in zip (tmp_disks, self.disks):
6931         rename_to.append(t_dsk.logical_id)
6932         t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk["adopt"])
6933         self.cfg.SetDiskID(t_dsk, pnode_name)
6934       result = self.rpc.call_blockdev_rename(pnode_name,
6935                                              zip(tmp_disks, rename_to))
6936       result.Raise("Failed to rename adoped LVs")
6937     else:
6938       feedback_fn("* creating instance disks...")
6939       try:
6940         _CreateDisks(self, iobj)
6941       except errors.OpExecError:
6942         self.LogWarning("Device creation failed, reverting...")
6943         try:
6944           _RemoveDisks(self, iobj)
6945         finally:
6946           self.cfg.ReleaseDRBDMinors(instance)
6947           raise
6948
6949     feedback_fn("adding instance %s to cluster config" % instance)
6950
6951     self.cfg.AddInstance(iobj, self.proc.GetECId())
6952
6953     # Declare that we don't want to remove the instance lock anymore, as we've
6954     # added the instance to the config
6955     del self.remove_locks[locking.LEVEL_INSTANCE]
6956     # Unlock all the nodes
6957     if self.op.mode == constants.INSTANCE_IMPORT:
6958       nodes_keep = [self.op.src_node]
6959       nodes_release = [node for node in self.acquired_locks[locking.LEVEL_NODE]
6960                        if node != self.op.src_node]
6961       self.context.glm.release(locking.LEVEL_NODE, nodes_release)
6962       self.acquired_locks[locking.LEVEL_NODE] = nodes_keep
6963     else:
6964       self.context.glm.release(locking.LEVEL_NODE)
6965       del self.acquired_locks[locking.LEVEL_NODE]
6966
6967     if self.op.wait_for_sync:
6968       disk_abort = not _WaitForSync(self, iobj)
6969     elif iobj.disk_template in constants.DTS_NET_MIRROR:
6970       # make sure the disks are not degraded (still sync-ing is ok)
6971       time.sleep(15)
6972       feedback_fn("* checking mirrors status")
6973       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
6974     else:
6975       disk_abort = False
6976
6977     if disk_abort:
6978       _RemoveDisks(self, iobj)
6979       self.cfg.RemoveInstance(iobj.name)
6980       # Make sure the instance lock gets removed
6981       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
6982       raise errors.OpExecError("There are some degraded disks for"
6983                                " this instance")
6984
6985     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
6986       if self.op.mode == constants.INSTANCE_CREATE:
6987         if not self.op.no_install:
6988           feedback_fn("* running the instance OS create scripts...")
6989           # FIXME: pass debug option from opcode to backend
6990           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
6991                                                  self.op.debug_level)
6992           result.Raise("Could not add os for instance %s"
6993                        " on node %s" % (instance, pnode_name))
6994
6995       elif self.op.mode == constants.INSTANCE_IMPORT:
6996         feedback_fn("* running the instance OS import scripts...")
6997
6998         transfers = []
6999
7000         for idx, image in enumerate(self.src_images):
7001           if not image:
7002             continue
7003
7004           # FIXME: pass debug option from opcode to backend
7005           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
7006                                              constants.IEIO_FILE, (image, ),
7007                                              constants.IEIO_SCRIPT,
7008                                              (iobj.disks[idx], idx),
7009                                              None)
7010           transfers.append(dt)
7011
7012         import_result = \
7013           masterd.instance.TransferInstanceData(self, feedback_fn,
7014                                                 self.op.src_node, pnode_name,
7015                                                 self.pnode.secondary_ip,
7016                                                 iobj, transfers)
7017         if not compat.all(import_result):
7018           self.LogWarning("Some disks for instance %s on node %s were not"
7019                           " imported successfully" % (instance, pnode_name))
7020
7021       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7022         feedback_fn("* preparing remote import...")
7023         connect_timeout = constants.RIE_CONNECT_TIMEOUT
7024         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
7025
7026         disk_results = masterd.instance.RemoteImport(self, feedback_fn, iobj,
7027                                                      self.source_x509_ca,
7028                                                      self._cds, timeouts)
7029         if not compat.all(disk_results):
7030           # TODO: Should the instance still be started, even if some disks
7031           # failed to import (valid for local imports, too)?
7032           self.LogWarning("Some disks for instance %s on node %s were not"
7033                           " imported successfully" % (instance, pnode_name))
7034
7035         # Run rename script on newly imported instance
7036         assert iobj.name == instance
7037         feedback_fn("Running rename script for %s" % instance)
7038         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
7039                                                    self.source_instance_name,
7040                                                    self.op.debug_level)
7041         if result.fail_msg:
7042           self.LogWarning("Failed to run rename script for %s on node"
7043                           " %s: %s" % (instance, pnode_name, result.fail_msg))
7044
7045       else:
7046         # also checked in the prereq part
7047         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
7048                                      % self.op.mode)
7049
7050     if self.op.start:
7051       iobj.admin_up = True
7052       self.cfg.Update(iobj, feedback_fn)
7053       logging.info("Starting instance %s on node %s", instance, pnode_name)
7054       feedback_fn("* starting instance...")
7055       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
7056       result.Raise("Could not start instance")
7057
7058     return list(iobj.all_nodes)
7059
7060
7061 class LUConnectConsole(NoHooksLU):
7062   """Connect to an instance's console.
7063
7064   This is somewhat special in that it returns the command line that
7065   you need to run on the master node in order to connect to the
7066   console.
7067
7068   """
7069   _OP_REQP = ["instance_name"]
7070   REQ_BGL = False
7071
7072   def ExpandNames(self):
7073     self._ExpandAndLockInstance()
7074
7075   def CheckPrereq(self):
7076     """Check prerequisites.
7077
7078     This checks that the instance is in the cluster.
7079
7080     """
7081     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
7082     assert self.instance is not None, \
7083       "Cannot retrieve locked instance %s" % self.op.instance_name
7084     _CheckNodeOnline(self, self.instance.primary_node)
7085
7086   def Exec(self, feedback_fn):
7087     """Connect to the console of an instance
7088
7089     """
7090     instance = self.instance
7091     node = instance.primary_node
7092
7093     node_insts = self.rpc.call_instance_list([node],
7094                                              [instance.hypervisor])[node]
7095     node_insts.Raise("Can't get node information from %s" % node)
7096
7097     if instance.name not in node_insts.payload:
7098       raise errors.OpExecError("Instance %s is not running." % instance.name)
7099
7100     logging.debug("Connecting to console of %s on %s", instance.name, node)
7101
7102     hyper = hypervisor.GetHypervisor(instance.hypervisor)
7103     cluster = self.cfg.GetClusterInfo()
7104     # beparams and hvparams are passed separately, to avoid editing the
7105     # instance and then saving the defaults in the instance itself.
7106     hvparams = cluster.FillHV(instance)
7107     beparams = cluster.FillBE(instance)
7108     console_cmd = hyper.GetShellCommandForConsole(instance, hvparams, beparams)
7109
7110     # build ssh cmdline
7111     return self.ssh.BuildCmd(node, "root", console_cmd, batch=True, tty=True)
7112
7113
7114 class LUReplaceDisks(LogicalUnit):
7115   """Replace the disks of an instance.
7116
7117   """
7118   HPATH = "mirrors-replace"
7119   HTYPE = constants.HTYPE_INSTANCE
7120   _OP_REQP = ["instance_name", "mode", "disks"]
7121   REQ_BGL = False
7122
7123   def CheckArguments(self):
7124     if not hasattr(self.op, "remote_node"):
7125       self.op.remote_node = None
7126     if not hasattr(self.op, "iallocator"):
7127       self.op.iallocator = None
7128     if not hasattr(self.op, "early_release"):
7129       self.op.early_release = False
7130
7131     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
7132                                   self.op.iallocator)
7133
7134   def ExpandNames(self):
7135     self._ExpandAndLockInstance()
7136
7137     if self.op.iallocator is not None:
7138       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7139
7140     elif self.op.remote_node is not None:
7141       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7142       self.op.remote_node = remote_node
7143
7144       # Warning: do not remove the locking of the new secondary here
7145       # unless DRBD8.AddChildren is changed to work in parallel;
7146       # currently it doesn't since parallel invocations of
7147       # FindUnusedMinor will conflict
7148       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
7149       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7150
7151     else:
7152       self.needed_locks[locking.LEVEL_NODE] = []
7153       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
7154
7155     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
7156                                    self.op.iallocator, self.op.remote_node,
7157                                    self.op.disks, False, self.op.early_release)
7158
7159     self.tasklets = [self.replacer]
7160
7161   def DeclareLocks(self, level):
7162     # If we're not already locking all nodes in the set we have to declare the
7163     # instance's primary/secondary nodes.
7164     if (level == locking.LEVEL_NODE and
7165         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7166       self._LockInstancesNodes()
7167
7168   def BuildHooksEnv(self):
7169     """Build hooks env.
7170
7171     This runs on the master, the primary and all the secondaries.
7172
7173     """
7174     instance = self.replacer.instance
7175     env = {
7176       "MODE": self.op.mode,
7177       "NEW_SECONDARY": self.op.remote_node,
7178       "OLD_SECONDARY": instance.secondary_nodes[0],
7179       }
7180     env.update(_BuildInstanceHookEnvByObject(self, instance))
7181     nl = [
7182       self.cfg.GetMasterNode(),
7183       instance.primary_node,
7184       ]
7185     if self.op.remote_node is not None:
7186       nl.append(self.op.remote_node)
7187     return env, nl, nl
7188
7189
7190 class LUEvacuateNode(LogicalUnit):
7191   """Relocate the secondary instances from a node.
7192
7193   """
7194   HPATH = "node-evacuate"
7195   HTYPE = constants.HTYPE_NODE
7196   _OP_REQP = ["node_name"]
7197   REQ_BGL = False
7198
7199   def CheckArguments(self):
7200     if not hasattr(self.op, "remote_node"):
7201       self.op.remote_node = None
7202     if not hasattr(self.op, "iallocator"):
7203       self.op.iallocator = None
7204     if not hasattr(self.op, "early_release"):
7205       self.op.early_release = False
7206
7207     TLReplaceDisks.CheckArguments(constants.REPLACE_DISK_CHG,
7208                                   self.op.remote_node,
7209                                   self.op.iallocator)
7210
7211   def ExpandNames(self):
7212     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7213
7214     self.needed_locks = {}
7215
7216     # Declare node locks
7217     if self.op.iallocator is not None:
7218       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7219
7220     elif self.op.remote_node is not None:
7221       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
7222
7223       # Warning: do not remove the locking of the new secondary here
7224       # unless DRBD8.AddChildren is changed to work in parallel;
7225       # currently it doesn't since parallel invocations of
7226       # FindUnusedMinor will conflict
7227       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
7228       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
7229
7230     else:
7231       raise errors.OpPrereqError("Invalid parameters", errors.ECODE_INVAL)
7232
7233     # Create tasklets for replacing disks for all secondary instances on this
7234     # node
7235     names = []
7236     tasklets = []
7237
7238     for inst in _GetNodeSecondaryInstances(self.cfg, self.op.node_name):
7239       logging.debug("Replacing disks for instance %s", inst.name)
7240       names.append(inst.name)
7241
7242       replacer = TLReplaceDisks(self, inst.name, constants.REPLACE_DISK_CHG,
7243                                 self.op.iallocator, self.op.remote_node, [],
7244                                 True, self.op.early_release)
7245       tasklets.append(replacer)
7246
7247     self.tasklets = tasklets
7248     self.instance_names = names
7249
7250     # Declare instance locks
7251     self.needed_locks[locking.LEVEL_INSTANCE] = self.instance_names
7252
7253   def DeclareLocks(self, level):
7254     # If we're not already locking all nodes in the set we have to declare the
7255     # instance's primary/secondary nodes.
7256     if (level == locking.LEVEL_NODE and
7257         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
7258       self._LockInstancesNodes()
7259
7260   def BuildHooksEnv(self):
7261     """Build hooks env.
7262
7263     This runs on the master, the primary and all the secondaries.
7264
7265     """
7266     env = {
7267       "NODE_NAME": self.op.node_name,
7268       }
7269
7270     nl = [self.cfg.GetMasterNode()]
7271
7272     if self.op.remote_node is not None:
7273       env["NEW_SECONDARY"] = self.op.remote_node
7274       nl.append(self.op.remote_node)
7275
7276     return (env, nl, nl)
7277
7278
7279 class TLReplaceDisks(Tasklet):
7280   """Replaces disks for an instance.
7281
7282   Note: Locking is not within the scope of this class.
7283
7284   """
7285   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
7286                disks, delay_iallocator, early_release):
7287     """Initializes this class.
7288
7289     """
7290     Tasklet.__init__(self, lu)
7291
7292     # Parameters
7293     self.instance_name = instance_name
7294     self.mode = mode
7295     self.iallocator_name = iallocator_name
7296     self.remote_node = remote_node
7297     self.disks = disks
7298     self.delay_iallocator = delay_iallocator
7299     self.early_release = early_release
7300
7301     # Runtime data
7302     self.instance = None
7303     self.new_node = None
7304     self.target_node = None
7305     self.other_node = None
7306     self.remote_node_info = None
7307     self.node_secondary_ip = None
7308
7309   @staticmethod
7310   def CheckArguments(mode, remote_node, iallocator):
7311     """Helper function for users of this class.
7312
7313     """
7314     # check for valid parameter combination
7315     if mode == constants.REPLACE_DISK_CHG:
7316       if remote_node is None and iallocator is None:
7317         raise errors.OpPrereqError("When changing the secondary either an"
7318                                    " iallocator script must be used or the"
7319                                    " new node given", errors.ECODE_INVAL)
7320
7321       if remote_node is not None and iallocator is not None:
7322         raise errors.OpPrereqError("Give either the iallocator or the new"
7323                                    " secondary, not both", errors.ECODE_INVAL)
7324
7325     elif remote_node is not None or iallocator is not None:
7326       # Not replacing the secondary
7327       raise errors.OpPrereqError("The iallocator and new node options can"
7328                                  " only be used when changing the"
7329                                  " secondary node", errors.ECODE_INVAL)
7330
7331   @staticmethod
7332   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
7333     """Compute a new secondary node using an IAllocator.
7334
7335     """
7336     ial = IAllocator(lu.cfg, lu.rpc,
7337                      mode=constants.IALLOCATOR_MODE_RELOC,
7338                      name=instance_name,
7339                      relocate_from=relocate_from)
7340
7341     ial.Run(iallocator_name)
7342
7343     if not ial.success:
7344       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
7345                                  " %s" % (iallocator_name, ial.info),
7346                                  errors.ECODE_NORES)
7347
7348     if len(ial.result) != ial.required_nodes:
7349       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7350                                  " of nodes (%s), required %s" %
7351                                  (iallocator_name,
7352                                   len(ial.result), ial.required_nodes),
7353                                  errors.ECODE_FAULT)
7354
7355     remote_node_name = ial.result[0]
7356
7357     lu.LogInfo("Selected new secondary for instance '%s': %s",
7358                instance_name, remote_node_name)
7359
7360     return remote_node_name
7361
7362   def _FindFaultyDisks(self, node_name):
7363     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
7364                                     node_name, True)
7365
7366   def CheckPrereq(self):
7367     """Check prerequisites.
7368
7369     This checks that the instance is in the cluster.
7370
7371     """
7372     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
7373     assert instance is not None, \
7374       "Cannot retrieve locked instance %s" % self.instance_name
7375
7376     if instance.disk_template != constants.DT_DRBD8:
7377       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
7378                                  " instances", errors.ECODE_INVAL)
7379
7380     if len(instance.secondary_nodes) != 1:
7381       raise errors.OpPrereqError("The instance has a strange layout,"
7382                                  " expected one secondary but found %d" %
7383                                  len(instance.secondary_nodes),
7384                                  errors.ECODE_FAULT)
7385
7386     if not self.delay_iallocator:
7387       self._CheckPrereq2()
7388
7389   def _CheckPrereq2(self):
7390     """Check prerequisites, second part.
7391
7392     This function should always be part of CheckPrereq. It was separated and is
7393     now called from Exec because during node evacuation iallocator was only
7394     called with an unmodified cluster model, not taking planned changes into
7395     account.
7396
7397     """
7398     instance = self.instance
7399     secondary_node = instance.secondary_nodes[0]
7400
7401     if self.iallocator_name is None:
7402       remote_node = self.remote_node
7403     else:
7404       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
7405                                        instance.name, instance.secondary_nodes)
7406
7407     if remote_node is not None:
7408       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
7409       assert self.remote_node_info is not None, \
7410         "Cannot retrieve locked node %s" % remote_node
7411     else:
7412       self.remote_node_info = None
7413
7414     if remote_node == self.instance.primary_node:
7415       raise errors.OpPrereqError("The specified node is the primary node of"
7416                                  " the instance.", errors.ECODE_INVAL)
7417
7418     if remote_node == secondary_node:
7419       raise errors.OpPrereqError("The specified node is already the"
7420                                  " secondary node of the instance.",
7421                                  errors.ECODE_INVAL)
7422
7423     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
7424                                     constants.REPLACE_DISK_CHG):
7425       raise errors.OpPrereqError("Cannot specify disks to be replaced",
7426                                  errors.ECODE_INVAL)
7427
7428     if self.mode == constants.REPLACE_DISK_AUTO:
7429       faulty_primary = self._FindFaultyDisks(instance.primary_node)
7430       faulty_secondary = self._FindFaultyDisks(secondary_node)
7431
7432       if faulty_primary and faulty_secondary:
7433         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
7434                                    " one node and can not be repaired"
7435                                    " automatically" % self.instance_name,
7436                                    errors.ECODE_STATE)
7437
7438       if faulty_primary:
7439         self.disks = faulty_primary
7440         self.target_node = instance.primary_node
7441         self.other_node = secondary_node
7442         check_nodes = [self.target_node, self.other_node]
7443       elif faulty_secondary:
7444         self.disks = faulty_secondary
7445         self.target_node = secondary_node
7446         self.other_node = instance.primary_node
7447         check_nodes = [self.target_node, self.other_node]
7448       else:
7449         self.disks = []
7450         check_nodes = []
7451
7452     else:
7453       # Non-automatic modes
7454       if self.mode == constants.REPLACE_DISK_PRI:
7455         self.target_node = instance.primary_node
7456         self.other_node = secondary_node
7457         check_nodes = [self.target_node, self.other_node]
7458
7459       elif self.mode == constants.REPLACE_DISK_SEC:
7460         self.target_node = secondary_node
7461         self.other_node = instance.primary_node
7462         check_nodes = [self.target_node, self.other_node]
7463
7464       elif self.mode == constants.REPLACE_DISK_CHG:
7465         self.new_node = remote_node
7466         self.other_node = instance.primary_node
7467         self.target_node = secondary_node
7468         check_nodes = [self.new_node, self.other_node]
7469
7470         _CheckNodeNotDrained(self.lu, remote_node)
7471
7472         old_node_info = self.cfg.GetNodeInfo(secondary_node)
7473         assert old_node_info is not None
7474         if old_node_info.offline and not self.early_release:
7475           # doesn't make sense to delay the release
7476           self.early_release = True
7477           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
7478                           " early-release mode", secondary_node)
7479
7480       else:
7481         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
7482                                      self.mode)
7483
7484       # If not specified all disks should be replaced
7485       if not self.disks:
7486         self.disks = range(len(self.instance.disks))
7487
7488     for node in check_nodes:
7489       _CheckNodeOnline(self.lu, node)
7490
7491     # Check whether disks are valid
7492     for disk_idx in self.disks:
7493       instance.FindDisk(disk_idx)
7494
7495     # Get secondary node IP addresses
7496     node_2nd_ip = {}
7497
7498     for node_name in [self.target_node, self.other_node, self.new_node]:
7499       if node_name is not None:
7500         node_2nd_ip[node_name] = self.cfg.GetNodeInfo(node_name).secondary_ip
7501
7502     self.node_secondary_ip = node_2nd_ip
7503
7504   def Exec(self, feedback_fn):
7505     """Execute disk replacement.
7506
7507     This dispatches the disk replacement to the appropriate handler.
7508
7509     """
7510     if self.delay_iallocator:
7511       self._CheckPrereq2()
7512
7513     if not self.disks:
7514       feedback_fn("No disks need replacement")
7515       return
7516
7517     feedback_fn("Replacing disk(s) %s for %s" %
7518                 (utils.CommaJoin(self.disks), self.instance.name))
7519
7520     activate_disks = (not self.instance.admin_up)
7521
7522     # Activate the instance disks if we're replacing them on a down instance
7523     if activate_disks:
7524       _StartInstanceDisks(self.lu, self.instance, True)
7525
7526     try:
7527       # Should we replace the secondary node?
7528       if self.new_node is not None:
7529         fn = self._ExecDrbd8Secondary
7530       else:
7531         fn = self._ExecDrbd8DiskOnly
7532
7533       return fn(feedback_fn)
7534
7535     finally:
7536       # Deactivate the instance disks if we're replacing them on a
7537       # down instance
7538       if activate_disks:
7539         _SafeShutdownInstanceDisks(self.lu, self.instance)
7540
7541   def _CheckVolumeGroup(self, nodes):
7542     self.lu.LogInfo("Checking volume groups")
7543
7544     vgname = self.cfg.GetVGName()
7545
7546     # Make sure volume group exists on all involved nodes
7547     results = self.rpc.call_vg_list(nodes)
7548     if not results:
7549       raise errors.OpExecError("Can't list volume groups on the nodes")
7550
7551     for node in nodes:
7552       res = results[node]
7553       res.Raise("Error checking node %s" % node)
7554       if vgname not in res.payload:
7555         raise errors.OpExecError("Volume group '%s' not found on node %s" %
7556                                  (vgname, node))
7557
7558   def _CheckDisksExistence(self, nodes):
7559     # Check disk existence
7560     for idx, dev in enumerate(self.instance.disks):
7561       if idx not in self.disks:
7562         continue
7563
7564       for node in nodes:
7565         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
7566         self.cfg.SetDiskID(dev, node)
7567
7568         result = self.rpc.call_blockdev_find(node, dev)
7569
7570         msg = result.fail_msg
7571         if msg or not result.payload:
7572           if not msg:
7573             msg = "disk not found"
7574           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
7575                                    (idx, node, msg))
7576
7577   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
7578     for idx, dev in enumerate(self.instance.disks):
7579       if idx not in self.disks:
7580         continue
7581
7582       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
7583                       (idx, node_name))
7584
7585       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
7586                                    ldisk=ldisk):
7587         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
7588                                  " replace disks for instance %s" %
7589                                  (node_name, self.instance.name))
7590
7591   def _CreateNewStorage(self, node_name):
7592     vgname = self.cfg.GetVGName()
7593     iv_names = {}
7594
7595     for idx, dev in enumerate(self.instance.disks):
7596       if idx not in self.disks:
7597         continue
7598
7599       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
7600
7601       self.cfg.SetDiskID(dev, node_name)
7602
7603       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
7604       names = _GenerateUniqueNames(self.lu, lv_names)
7605
7606       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
7607                              logical_id=(vgname, names[0]))
7608       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7609                              logical_id=(vgname, names[1]))
7610
7611       new_lvs = [lv_data, lv_meta]
7612       old_lvs = dev.children
7613       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
7614
7615       # we pass force_create=True to force the LVM creation
7616       for new_lv in new_lvs:
7617         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
7618                         _GetInstanceInfoText(self.instance), False)
7619
7620     return iv_names
7621
7622   def _CheckDevices(self, node_name, iv_names):
7623     for name, (dev, _, _) in iv_names.iteritems():
7624       self.cfg.SetDiskID(dev, node_name)
7625
7626       result = self.rpc.call_blockdev_find(node_name, dev)
7627
7628       msg = result.fail_msg
7629       if msg or not result.payload:
7630         if not msg:
7631           msg = "disk not found"
7632         raise errors.OpExecError("Can't find DRBD device %s: %s" %
7633                                  (name, msg))
7634
7635       if result.payload.is_degraded:
7636         raise errors.OpExecError("DRBD device %s is degraded!" % name)
7637
7638   def _RemoveOldStorage(self, node_name, iv_names):
7639     for name, (_, old_lvs, _) in iv_names.iteritems():
7640       self.lu.LogInfo("Remove logical volumes for %s" % name)
7641
7642       for lv in old_lvs:
7643         self.cfg.SetDiskID(lv, node_name)
7644
7645         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
7646         if msg:
7647           self.lu.LogWarning("Can't remove old LV: %s" % msg,
7648                              hint="remove unused LVs manually")
7649
7650   def _ReleaseNodeLock(self, node_name):
7651     """Releases the lock for a given node."""
7652     self.lu.context.glm.release(locking.LEVEL_NODE, node_name)
7653
7654   def _ExecDrbd8DiskOnly(self, feedback_fn):
7655     """Replace a disk on the primary or secondary for DRBD 8.
7656
7657     The algorithm for replace is quite complicated:
7658
7659       1. for each disk to be replaced:
7660
7661         1. create new LVs on the target node with unique names
7662         1. detach old LVs from the drbd device
7663         1. rename old LVs to name_replaced.<time_t>
7664         1. rename new LVs to old LVs
7665         1. attach the new LVs (with the old names now) to the drbd device
7666
7667       1. wait for sync across all devices
7668
7669       1. for each modified disk:
7670
7671         1. remove old LVs (which have the name name_replaces.<time_t>)
7672
7673     Failures are not very well handled.
7674
7675     """
7676     steps_total = 6
7677
7678     # Step: check device activation
7679     self.lu.LogStep(1, steps_total, "Check device existence")
7680     self._CheckDisksExistence([self.other_node, self.target_node])
7681     self._CheckVolumeGroup([self.target_node, self.other_node])
7682
7683     # Step: check other node consistency
7684     self.lu.LogStep(2, steps_total, "Check peer consistency")
7685     self._CheckDisksConsistency(self.other_node,
7686                                 self.other_node == self.instance.primary_node,
7687                                 False)
7688
7689     # Step: create new storage
7690     self.lu.LogStep(3, steps_total, "Allocate new storage")
7691     iv_names = self._CreateNewStorage(self.target_node)
7692
7693     # Step: for each lv, detach+rename*2+attach
7694     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7695     for dev, old_lvs, new_lvs in iv_names.itervalues():
7696       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
7697
7698       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
7699                                                      old_lvs)
7700       result.Raise("Can't detach drbd from local storage on node"
7701                    " %s for device %s" % (self.target_node, dev.iv_name))
7702       #dev.children = []
7703       #cfg.Update(instance)
7704
7705       # ok, we created the new LVs, so now we know we have the needed
7706       # storage; as such, we proceed on the target node to rename
7707       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
7708       # using the assumption that logical_id == physical_id (which in
7709       # turn is the unique_id on that node)
7710
7711       # FIXME(iustin): use a better name for the replaced LVs
7712       temp_suffix = int(time.time())
7713       ren_fn = lambda d, suff: (d.physical_id[0],
7714                                 d.physical_id[1] + "_replaced-%s" % suff)
7715
7716       # Build the rename list based on what LVs exist on the node
7717       rename_old_to_new = []
7718       for to_ren in old_lvs:
7719         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
7720         if not result.fail_msg and result.payload:
7721           # device exists
7722           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
7723
7724       self.lu.LogInfo("Renaming the old LVs on the target node")
7725       result = self.rpc.call_blockdev_rename(self.target_node,
7726                                              rename_old_to_new)
7727       result.Raise("Can't rename old LVs on node %s" % self.target_node)
7728
7729       # Now we rename the new LVs to the old LVs
7730       self.lu.LogInfo("Renaming the new LVs on the target node")
7731       rename_new_to_old = [(new, old.physical_id)
7732                            for old, new in zip(old_lvs, new_lvs)]
7733       result = self.rpc.call_blockdev_rename(self.target_node,
7734                                              rename_new_to_old)
7735       result.Raise("Can't rename new LVs on node %s" % self.target_node)
7736
7737       for old, new in zip(old_lvs, new_lvs):
7738         new.logical_id = old.logical_id
7739         self.cfg.SetDiskID(new, self.target_node)
7740
7741       for disk in old_lvs:
7742         disk.logical_id = ren_fn(disk, temp_suffix)
7743         self.cfg.SetDiskID(disk, self.target_node)
7744
7745       # Now that the new lvs have the old name, we can add them to the device
7746       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
7747       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
7748                                                   new_lvs)
7749       msg = result.fail_msg
7750       if msg:
7751         for new_lv in new_lvs:
7752           msg2 = self.rpc.call_blockdev_remove(self.target_node,
7753                                                new_lv).fail_msg
7754           if msg2:
7755             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
7756                                hint=("cleanup manually the unused logical"
7757                                      "volumes"))
7758         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
7759
7760       dev.children = new_lvs
7761
7762       self.cfg.Update(self.instance, feedback_fn)
7763
7764     cstep = 5
7765     if self.early_release:
7766       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7767       cstep += 1
7768       self._RemoveOldStorage(self.target_node, iv_names)
7769       # WARNING: we release both node locks here, do not do other RPCs
7770       # than WaitForSync to the primary node
7771       self._ReleaseNodeLock([self.target_node, self.other_node])
7772
7773     # Wait for sync
7774     # This can fail as the old devices are degraded and _WaitForSync
7775     # does a combined result over all disks, so we don't check its return value
7776     self.lu.LogStep(cstep, steps_total, "Sync devices")
7777     cstep += 1
7778     _WaitForSync(self.lu, self.instance)
7779
7780     # Check all devices manually
7781     self._CheckDevices(self.instance.primary_node, iv_names)
7782
7783     # Step: remove old storage
7784     if not self.early_release:
7785       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7786       cstep += 1
7787       self._RemoveOldStorage(self.target_node, iv_names)
7788
7789   def _ExecDrbd8Secondary(self, feedback_fn):
7790     """Replace the secondary node for DRBD 8.
7791
7792     The algorithm for replace is quite complicated:
7793       - for all disks of the instance:
7794         - create new LVs on the new node with same names
7795         - shutdown the drbd device on the old secondary
7796         - disconnect the drbd network on the primary
7797         - create the drbd device on the new secondary
7798         - network attach the drbd on the primary, using an artifice:
7799           the drbd code for Attach() will connect to the network if it
7800           finds a device which is connected to the good local disks but
7801           not network enabled
7802       - wait for sync across all devices
7803       - remove all disks from the old secondary
7804
7805     Failures are not very well handled.
7806
7807     """
7808     steps_total = 6
7809
7810     # Step: check device activation
7811     self.lu.LogStep(1, steps_total, "Check device existence")
7812     self._CheckDisksExistence([self.instance.primary_node])
7813     self._CheckVolumeGroup([self.instance.primary_node])
7814
7815     # Step: check other node consistency
7816     self.lu.LogStep(2, steps_total, "Check peer consistency")
7817     self._CheckDisksConsistency(self.instance.primary_node, True, True)
7818
7819     # Step: create new storage
7820     self.lu.LogStep(3, steps_total, "Allocate new storage")
7821     for idx, dev in enumerate(self.instance.disks):
7822       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
7823                       (self.new_node, idx))
7824       # we pass force_create=True to force LVM creation
7825       for new_lv in dev.children:
7826         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
7827                         _GetInstanceInfoText(self.instance), False)
7828
7829     # Step 4: dbrd minors and drbd setups changes
7830     # after this, we must manually remove the drbd minors on both the
7831     # error and the success paths
7832     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
7833     minors = self.cfg.AllocateDRBDMinor([self.new_node
7834                                          for dev in self.instance.disks],
7835                                         self.instance.name)
7836     logging.debug("Allocated minors %r", minors)
7837
7838     iv_names = {}
7839     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
7840       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
7841                       (self.new_node, idx))
7842       # create new devices on new_node; note that we create two IDs:
7843       # one without port, so the drbd will be activated without
7844       # networking information on the new node at this stage, and one
7845       # with network, for the latter activation in step 4
7846       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
7847       if self.instance.primary_node == o_node1:
7848         p_minor = o_minor1
7849       else:
7850         assert self.instance.primary_node == o_node2, "Three-node instance?"
7851         p_minor = o_minor2
7852
7853       new_alone_id = (self.instance.primary_node, self.new_node, None,
7854                       p_minor, new_minor, o_secret)
7855       new_net_id = (self.instance.primary_node, self.new_node, o_port,
7856                     p_minor, new_minor, o_secret)
7857
7858       iv_names[idx] = (dev, dev.children, new_net_id)
7859       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
7860                     new_net_id)
7861       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
7862                               logical_id=new_alone_id,
7863                               children=dev.children,
7864                               size=dev.size)
7865       try:
7866         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
7867                               _GetInstanceInfoText(self.instance), False)
7868       except errors.GenericError:
7869         self.cfg.ReleaseDRBDMinors(self.instance.name)
7870         raise
7871
7872     # We have new devices, shutdown the drbd on the old secondary
7873     for idx, dev in enumerate(self.instance.disks):
7874       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
7875       self.cfg.SetDiskID(dev, self.target_node)
7876       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
7877       if msg:
7878         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
7879                            "node: %s" % (idx, msg),
7880                            hint=("Please cleanup this device manually as"
7881                                  " soon as possible"))
7882
7883     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
7884     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
7885                                                self.node_secondary_ip,
7886                                                self.instance.disks)\
7887                                               [self.instance.primary_node]
7888
7889     msg = result.fail_msg
7890     if msg:
7891       # detaches didn't succeed (unlikely)
7892       self.cfg.ReleaseDRBDMinors(self.instance.name)
7893       raise errors.OpExecError("Can't detach the disks from the network on"
7894                                " old node: %s" % (msg,))
7895
7896     # if we managed to detach at least one, we update all the disks of
7897     # the instance to point to the new secondary
7898     self.lu.LogInfo("Updating instance configuration")
7899     for dev, _, new_logical_id in iv_names.itervalues():
7900       dev.logical_id = new_logical_id
7901       self.cfg.SetDiskID(dev, self.instance.primary_node)
7902
7903     self.cfg.Update(self.instance, feedback_fn)
7904
7905     # and now perform the drbd attach
7906     self.lu.LogInfo("Attaching primary drbds to new secondary"
7907                     " (standalone => connected)")
7908     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
7909                                             self.new_node],
7910                                            self.node_secondary_ip,
7911                                            self.instance.disks,
7912                                            self.instance.name,
7913                                            False)
7914     for to_node, to_result in result.items():
7915       msg = to_result.fail_msg
7916       if msg:
7917         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
7918                            to_node, msg,
7919                            hint=("please do a gnt-instance info to see the"
7920                                  " status of disks"))
7921     cstep = 5
7922     if self.early_release:
7923       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7924       cstep += 1
7925       self._RemoveOldStorage(self.target_node, iv_names)
7926       # WARNING: we release all node locks here, do not do other RPCs
7927       # than WaitForSync to the primary node
7928       self._ReleaseNodeLock([self.instance.primary_node,
7929                              self.target_node,
7930                              self.new_node])
7931
7932     # Wait for sync
7933     # This can fail as the old devices are degraded and _WaitForSync
7934     # does a combined result over all disks, so we don't check its return value
7935     self.lu.LogStep(cstep, steps_total, "Sync devices")
7936     cstep += 1
7937     _WaitForSync(self.lu, self.instance)
7938
7939     # Check all devices manually
7940     self._CheckDevices(self.instance.primary_node, iv_names)
7941
7942     # Step: remove old storage
7943     if not self.early_release:
7944       self.lu.LogStep(cstep, steps_total, "Removing old storage")
7945       self._RemoveOldStorage(self.target_node, iv_names)
7946
7947
7948 class LURepairNodeStorage(NoHooksLU):
7949   """Repairs the volume group on a node.
7950
7951   """
7952   _OP_REQP = ["node_name"]
7953   REQ_BGL = False
7954
7955   def CheckArguments(self):
7956     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
7957
7958     _CheckStorageType(self.op.storage_type)
7959
7960   def ExpandNames(self):
7961     self.needed_locks = {
7962       locking.LEVEL_NODE: [self.op.node_name],
7963       }
7964
7965   def _CheckFaultyDisks(self, instance, node_name):
7966     """Ensure faulty disks abort the opcode or at least warn."""
7967     try:
7968       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
7969                                   node_name, True):
7970         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
7971                                    " node '%s'" % (instance.name, node_name),
7972                                    errors.ECODE_STATE)
7973     except errors.OpPrereqError, err:
7974       if self.op.ignore_consistency:
7975         self.proc.LogWarning(str(err.args[0]))
7976       else:
7977         raise
7978
7979   def CheckPrereq(self):
7980     """Check prerequisites.
7981
7982     """
7983     storage_type = self.op.storage_type
7984
7985     if (constants.SO_FIX_CONSISTENCY not in
7986         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
7987       raise errors.OpPrereqError("Storage units of type '%s' can not be"
7988                                  " repaired" % storage_type,
7989                                  errors.ECODE_INVAL)
7990
7991     # Check whether any instance on this node has faulty disks
7992     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
7993       if not inst.admin_up:
7994         continue
7995       check_nodes = set(inst.all_nodes)
7996       check_nodes.discard(self.op.node_name)
7997       for inst_node_name in check_nodes:
7998         self._CheckFaultyDisks(inst, inst_node_name)
7999
8000   def Exec(self, feedback_fn):
8001     feedback_fn("Repairing storage unit '%s' on %s ..." %
8002                 (self.op.name, self.op.node_name))
8003
8004     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
8005     result = self.rpc.call_storage_execute(self.op.node_name,
8006                                            self.op.storage_type, st_args,
8007                                            self.op.name,
8008                                            constants.SO_FIX_CONSISTENCY)
8009     result.Raise("Failed to repair storage unit '%s' on %s" %
8010                  (self.op.name, self.op.node_name))
8011
8012
8013 class LUNodeEvacuationStrategy(NoHooksLU):
8014   """Computes the node evacuation strategy.
8015
8016   """
8017   _OP_REQP = ["nodes"]
8018   REQ_BGL = False
8019
8020   def CheckArguments(self):
8021     if not hasattr(self.op, "remote_node"):
8022       self.op.remote_node = None
8023     if not hasattr(self.op, "iallocator"):
8024       self.op.iallocator = None
8025     if self.op.remote_node is not None and self.op.iallocator is not None:
8026       raise errors.OpPrereqError("Give either the iallocator or the new"
8027                                  " secondary, not both", errors.ECODE_INVAL)
8028
8029   def ExpandNames(self):
8030     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
8031     self.needed_locks = locks = {}
8032     if self.op.remote_node is None:
8033       locks[locking.LEVEL_NODE] = locking.ALL_SET
8034     else:
8035       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8036       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
8037
8038   def CheckPrereq(self):
8039     pass
8040
8041   def Exec(self, feedback_fn):
8042     if self.op.remote_node is not None:
8043       instances = []
8044       for node in self.op.nodes:
8045         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
8046       result = []
8047       for i in instances:
8048         if i.primary_node == self.op.remote_node:
8049           raise errors.OpPrereqError("Node %s is the primary node of"
8050                                      " instance %s, cannot use it as"
8051                                      " secondary" %
8052                                      (self.op.remote_node, i.name),
8053                                      errors.ECODE_INVAL)
8054         result.append([i.name, self.op.remote_node])
8055     else:
8056       ial = IAllocator(self.cfg, self.rpc,
8057                        mode=constants.IALLOCATOR_MODE_MEVAC,
8058                        evac_nodes=self.op.nodes)
8059       ial.Run(self.op.iallocator, validate=True)
8060       if not ial.success:
8061         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
8062                                  errors.ECODE_NORES)
8063       result = ial.result
8064     return result
8065
8066
8067 class LUGrowDisk(LogicalUnit):
8068   """Grow a disk of an instance.
8069
8070   """
8071   HPATH = "disk-grow"
8072   HTYPE = constants.HTYPE_INSTANCE
8073   _OP_REQP = ["instance_name", "disk", "amount", "wait_for_sync"]
8074   REQ_BGL = False
8075
8076   def ExpandNames(self):
8077     self._ExpandAndLockInstance()
8078     self.needed_locks[locking.LEVEL_NODE] = []
8079     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8080
8081   def DeclareLocks(self, level):
8082     if level == locking.LEVEL_NODE:
8083       self._LockInstancesNodes()
8084
8085   def BuildHooksEnv(self):
8086     """Build hooks env.
8087
8088     This runs on the master, the primary and all the secondaries.
8089
8090     """
8091     env = {
8092       "DISK": self.op.disk,
8093       "AMOUNT": self.op.amount,
8094       }
8095     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
8096     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8097     return env, nl, nl
8098
8099   def CheckPrereq(self):
8100     """Check prerequisites.
8101
8102     This checks that the instance is in the cluster.
8103
8104     """
8105     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8106     assert instance is not None, \
8107       "Cannot retrieve locked instance %s" % self.op.instance_name
8108     nodenames = list(instance.all_nodes)
8109     for node in nodenames:
8110       _CheckNodeOnline(self, node)
8111
8112
8113     self.instance = instance
8114
8115     if instance.disk_template not in constants.DTS_GROWABLE:
8116       raise errors.OpPrereqError("Instance's disk layout does not support"
8117                                  " growing.", errors.ECODE_INVAL)
8118
8119     self.disk = instance.FindDisk(self.op.disk)
8120
8121     if instance.disk_template != constants.DT_FILE:
8122       # TODO: check the free disk space for file, when that feature will be
8123       # supported
8124       _CheckNodesFreeDisk(self, nodenames, self.op.amount)
8125
8126   def Exec(self, feedback_fn):
8127     """Execute disk grow.
8128
8129     """
8130     instance = self.instance
8131     disk = self.disk
8132
8133     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
8134     if not disks_ok:
8135       raise errors.OpExecError("Cannot activate block device to grow")
8136
8137     for node in instance.all_nodes:
8138       self.cfg.SetDiskID(disk, node)
8139       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
8140       result.Raise("Grow request failed to node %s" % node)
8141
8142       # TODO: Rewrite code to work properly
8143       # DRBD goes into sync mode for a short amount of time after executing the
8144       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
8145       # calling "resize" in sync mode fails. Sleeping for a short amount of
8146       # time is a work-around.
8147       time.sleep(5)
8148
8149     disk.RecordGrow(self.op.amount)
8150     self.cfg.Update(instance, feedback_fn)
8151     if self.op.wait_for_sync:
8152       disk_abort = not _WaitForSync(self, instance, disks=[disk])
8153       if disk_abort:
8154         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
8155                              " status.\nPlease check the instance.")
8156       if not instance.admin_up:
8157         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
8158     elif not instance.admin_up:
8159       self.proc.LogWarning("Not shutting down the disk even if the instance is"
8160                            " not supposed to be running because no wait for"
8161                            " sync mode was requested.")
8162
8163
8164 class LUQueryInstanceData(NoHooksLU):
8165   """Query runtime instance data.
8166
8167   """
8168   _OP_REQP = ["instances", "static"]
8169   REQ_BGL = False
8170
8171   def ExpandNames(self):
8172     self.needed_locks = {}
8173     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
8174
8175     if not isinstance(self.op.instances, list):
8176       raise errors.OpPrereqError("Invalid argument type 'instances'",
8177                                  errors.ECODE_INVAL)
8178
8179     if self.op.instances:
8180       self.wanted_names = []
8181       for name in self.op.instances:
8182         full_name = _ExpandInstanceName(self.cfg, name)
8183         self.wanted_names.append(full_name)
8184       self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
8185     else:
8186       self.wanted_names = None
8187       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
8188
8189     self.needed_locks[locking.LEVEL_NODE] = []
8190     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8191
8192   def DeclareLocks(self, level):
8193     if level == locking.LEVEL_NODE:
8194       self._LockInstancesNodes()
8195
8196   def CheckPrereq(self):
8197     """Check prerequisites.
8198
8199     This only checks the optional instance list against the existing names.
8200
8201     """
8202     if self.wanted_names is None:
8203       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
8204
8205     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
8206                              in self.wanted_names]
8207     return
8208
8209   def _ComputeBlockdevStatus(self, node, instance_name, dev):
8210     """Returns the status of a block device
8211
8212     """
8213     if self.op.static or not node:
8214       return None
8215
8216     self.cfg.SetDiskID(dev, node)
8217
8218     result = self.rpc.call_blockdev_find(node, dev)
8219     if result.offline:
8220       return None
8221
8222     result.Raise("Can't compute disk status for %s" % instance_name)
8223
8224     status = result.payload
8225     if status is None:
8226       return None
8227
8228     return (status.dev_path, status.major, status.minor,
8229             status.sync_percent, status.estimated_time,
8230             status.is_degraded, status.ldisk_status)
8231
8232   def _ComputeDiskStatus(self, instance, snode, dev):
8233     """Compute block device status.
8234
8235     """
8236     if dev.dev_type in constants.LDS_DRBD:
8237       # we change the snode then (otherwise we use the one passed in)
8238       if dev.logical_id[0] == instance.primary_node:
8239         snode = dev.logical_id[1]
8240       else:
8241         snode = dev.logical_id[0]
8242
8243     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
8244                                               instance.name, dev)
8245     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
8246
8247     if dev.children:
8248       dev_children = [self._ComputeDiskStatus(instance, snode, child)
8249                       for child in dev.children]
8250     else:
8251       dev_children = []
8252
8253     data = {
8254       "iv_name": dev.iv_name,
8255       "dev_type": dev.dev_type,
8256       "logical_id": dev.logical_id,
8257       "physical_id": dev.physical_id,
8258       "pstatus": dev_pstatus,
8259       "sstatus": dev_sstatus,
8260       "children": dev_children,
8261       "mode": dev.mode,
8262       "size": dev.size,
8263       }
8264
8265     return data
8266
8267   def Exec(self, feedback_fn):
8268     """Gather and return data"""
8269     result = {}
8270
8271     cluster = self.cfg.GetClusterInfo()
8272
8273     for instance in self.wanted_instances:
8274       if not self.op.static:
8275         remote_info = self.rpc.call_instance_info(instance.primary_node,
8276                                                   instance.name,
8277                                                   instance.hypervisor)
8278         remote_info.Raise("Error checking node %s" % instance.primary_node)
8279         remote_info = remote_info.payload
8280         if remote_info and "state" in remote_info:
8281           remote_state = "up"
8282         else:
8283           remote_state = "down"
8284       else:
8285         remote_state = None
8286       if instance.admin_up:
8287         config_state = "up"
8288       else:
8289         config_state = "down"
8290
8291       disks = [self._ComputeDiskStatus(instance, None, device)
8292                for device in instance.disks]
8293
8294       idict = {
8295         "name": instance.name,
8296         "config_state": config_state,
8297         "run_state": remote_state,
8298         "pnode": instance.primary_node,
8299         "snodes": instance.secondary_nodes,
8300         "os": instance.os,
8301         # this happens to be the same format used for hooks
8302         "nics": _NICListToTuple(self, instance.nics),
8303         "disk_template": instance.disk_template,
8304         "disks": disks,
8305         "hypervisor": instance.hypervisor,
8306         "network_port": instance.network_port,
8307         "hv_instance": instance.hvparams,
8308         "hv_actual": cluster.FillHV(instance, skip_globals=True),
8309         "be_instance": instance.beparams,
8310         "be_actual": cluster.FillBE(instance),
8311         "os_instance": instance.osparams,
8312         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
8313         "serial_no": instance.serial_no,
8314         "mtime": instance.mtime,
8315         "ctime": instance.ctime,
8316         "uuid": instance.uuid,
8317         }
8318
8319       result[instance.name] = idict
8320
8321     return result
8322
8323
8324 class LUSetInstanceParams(LogicalUnit):
8325   """Modifies an instances's parameters.
8326
8327   """
8328   HPATH = "instance-modify"
8329   HTYPE = constants.HTYPE_INSTANCE
8330   _OP_REQP = ["instance_name"]
8331   REQ_BGL = False
8332
8333   def CheckArguments(self):
8334     if not hasattr(self.op, 'nics'):
8335       self.op.nics = []
8336     if not hasattr(self.op, 'disks'):
8337       self.op.disks = []
8338     if not hasattr(self.op, 'beparams'):
8339       self.op.beparams = {}
8340     if not hasattr(self.op, 'hvparams'):
8341       self.op.hvparams = {}
8342     if not hasattr(self.op, "disk_template"):
8343       self.op.disk_template = None
8344     if not hasattr(self.op, "remote_node"):
8345       self.op.remote_node = None
8346     if not hasattr(self.op, "os_name"):
8347       self.op.os_name = None
8348     if not hasattr(self.op, "force_variant"):
8349       self.op.force_variant = False
8350     if not hasattr(self.op, "osparams"):
8351       self.op.osparams = None
8352     self.op.force = getattr(self.op, "force", False)
8353     if not (self.op.nics or self.op.disks or self.op.disk_template or
8354             self.op.hvparams or self.op.beparams or self.op.os_name):
8355       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
8356
8357     if self.op.hvparams:
8358       _CheckGlobalHvParams(self.op.hvparams)
8359
8360     # Disk validation
8361     disk_addremove = 0
8362     for disk_op, disk_dict in self.op.disks:
8363       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
8364       if disk_op == constants.DDM_REMOVE:
8365         disk_addremove += 1
8366         continue
8367       elif disk_op == constants.DDM_ADD:
8368         disk_addremove += 1
8369       else:
8370         if not isinstance(disk_op, int):
8371           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
8372         if not isinstance(disk_dict, dict):
8373           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
8374           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8375
8376       if disk_op == constants.DDM_ADD:
8377         mode = disk_dict.setdefault('mode', constants.DISK_RDWR)
8378         if mode not in constants.DISK_ACCESS_SET:
8379           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
8380                                      errors.ECODE_INVAL)
8381         size = disk_dict.get('size', None)
8382         if size is None:
8383           raise errors.OpPrereqError("Required disk parameter size missing",
8384                                      errors.ECODE_INVAL)
8385         try:
8386           size = int(size)
8387         except (TypeError, ValueError), err:
8388           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
8389                                      str(err), errors.ECODE_INVAL)
8390         disk_dict['size'] = size
8391       else:
8392         # modification of disk
8393         if 'size' in disk_dict:
8394           raise errors.OpPrereqError("Disk size change not possible, use"
8395                                      " grow-disk", errors.ECODE_INVAL)
8396
8397     if disk_addremove > 1:
8398       raise errors.OpPrereqError("Only one disk add or remove operation"
8399                                  " supported at a time", errors.ECODE_INVAL)
8400
8401     if self.op.disks and self.op.disk_template is not None:
8402       raise errors.OpPrereqError("Disk template conversion and other disk"
8403                                  " changes not supported at the same time",
8404                                  errors.ECODE_INVAL)
8405
8406     if self.op.disk_template:
8407       _CheckDiskTemplate(self.op.disk_template)
8408       if (self.op.disk_template in constants.DTS_NET_MIRROR and
8409           self.op.remote_node is None):
8410         raise errors.OpPrereqError("Changing the disk template to a mirrored"
8411                                    " one requires specifying a secondary node",
8412                                    errors.ECODE_INVAL)
8413
8414     # NIC validation
8415     nic_addremove = 0
8416     for nic_op, nic_dict in self.op.nics:
8417       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
8418       if nic_op == constants.DDM_REMOVE:
8419         nic_addremove += 1
8420         continue
8421       elif nic_op == constants.DDM_ADD:
8422         nic_addremove += 1
8423       else:
8424         if not isinstance(nic_op, int):
8425           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
8426         if not isinstance(nic_dict, dict):
8427           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
8428           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
8429
8430       # nic_dict should be a dict
8431       nic_ip = nic_dict.get('ip', None)
8432       if nic_ip is not None:
8433         if nic_ip.lower() == constants.VALUE_NONE:
8434           nic_dict['ip'] = None
8435         else:
8436           if not utils.IsValidIP(nic_ip):
8437             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
8438                                        errors.ECODE_INVAL)
8439
8440       nic_bridge = nic_dict.get('bridge', None)
8441       nic_link = nic_dict.get('link', None)
8442       if nic_bridge and nic_link:
8443         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
8444                                    " at the same time", errors.ECODE_INVAL)
8445       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
8446         nic_dict['bridge'] = None
8447       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
8448         nic_dict['link'] = None
8449
8450       if nic_op == constants.DDM_ADD:
8451         nic_mac = nic_dict.get('mac', None)
8452         if nic_mac is None:
8453           nic_dict['mac'] = constants.VALUE_AUTO
8454
8455       if 'mac' in nic_dict:
8456         nic_mac = nic_dict['mac']
8457         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8458           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
8459
8460         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
8461           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
8462                                      " modifying an existing nic",
8463                                      errors.ECODE_INVAL)
8464
8465     if nic_addremove > 1:
8466       raise errors.OpPrereqError("Only one NIC add or remove operation"
8467                                  " supported at a time", errors.ECODE_INVAL)
8468
8469   def ExpandNames(self):
8470     self._ExpandAndLockInstance()
8471     self.needed_locks[locking.LEVEL_NODE] = []
8472     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8473
8474   def DeclareLocks(self, level):
8475     if level == locking.LEVEL_NODE:
8476       self._LockInstancesNodes()
8477       if self.op.disk_template and self.op.remote_node:
8478         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8479         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
8480
8481   def BuildHooksEnv(self):
8482     """Build hooks env.
8483
8484     This runs on the master, primary and secondaries.
8485
8486     """
8487     args = dict()
8488     if constants.BE_MEMORY in self.be_new:
8489       args['memory'] = self.be_new[constants.BE_MEMORY]
8490     if constants.BE_VCPUS in self.be_new:
8491       args['vcpus'] = self.be_new[constants.BE_VCPUS]
8492     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
8493     # information at all.
8494     if self.op.nics:
8495       args['nics'] = []
8496       nic_override = dict(self.op.nics)
8497       for idx, nic in enumerate(self.instance.nics):
8498         if idx in nic_override:
8499           this_nic_override = nic_override[idx]
8500         else:
8501           this_nic_override = {}
8502         if 'ip' in this_nic_override:
8503           ip = this_nic_override['ip']
8504         else:
8505           ip = nic.ip
8506         if 'mac' in this_nic_override:
8507           mac = this_nic_override['mac']
8508         else:
8509           mac = nic.mac
8510         if idx in self.nic_pnew:
8511           nicparams = self.nic_pnew[idx]
8512         else:
8513           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
8514         mode = nicparams[constants.NIC_MODE]
8515         link = nicparams[constants.NIC_LINK]
8516         args['nics'].append((ip, mac, mode, link))
8517       if constants.DDM_ADD in nic_override:
8518         ip = nic_override[constants.DDM_ADD].get('ip', None)
8519         mac = nic_override[constants.DDM_ADD]['mac']
8520         nicparams = self.nic_pnew[constants.DDM_ADD]
8521         mode = nicparams[constants.NIC_MODE]
8522         link = nicparams[constants.NIC_LINK]
8523         args['nics'].append((ip, mac, mode, link))
8524       elif constants.DDM_REMOVE in nic_override:
8525         del args['nics'][-1]
8526
8527     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
8528     if self.op.disk_template:
8529       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
8530     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
8531     return env, nl, nl
8532
8533   def CheckPrereq(self):
8534     """Check prerequisites.
8535
8536     This only checks the instance list against the existing names.
8537
8538     """
8539     self.force = self.op.force
8540
8541     # checking the new params on the primary/secondary nodes
8542
8543     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8544     cluster = self.cluster = self.cfg.GetClusterInfo()
8545     assert self.instance is not None, \
8546       "Cannot retrieve locked instance %s" % self.op.instance_name
8547     pnode = instance.primary_node
8548     nodelist = list(instance.all_nodes)
8549
8550     # OS change
8551     if self.op.os_name and not self.op.force:
8552       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
8553                       self.op.force_variant)
8554       instance_os = self.op.os_name
8555     else:
8556       instance_os = instance.os
8557
8558     if self.op.disk_template:
8559       if instance.disk_template == self.op.disk_template:
8560         raise errors.OpPrereqError("Instance already has disk template %s" %
8561                                    instance.disk_template, errors.ECODE_INVAL)
8562
8563       if (instance.disk_template,
8564           self.op.disk_template) not in self._DISK_CONVERSIONS:
8565         raise errors.OpPrereqError("Unsupported disk template conversion from"
8566                                    " %s to %s" % (instance.disk_template,
8567                                                   self.op.disk_template),
8568                                    errors.ECODE_INVAL)
8569       if self.op.disk_template in constants.DTS_NET_MIRROR:
8570         _CheckNodeOnline(self, self.op.remote_node)
8571         _CheckNodeNotDrained(self, self.op.remote_node)
8572         disks = [{"size": d.size} for d in instance.disks]
8573         required = _ComputeDiskSize(self.op.disk_template, disks)
8574         _CheckNodesFreeDisk(self, [self.op.remote_node], required)
8575         _CheckInstanceDown(self, instance, "cannot change disk template")
8576
8577     # hvparams processing
8578     if self.op.hvparams:
8579       hv_type = instance.hypervisor
8580       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
8581       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
8582       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
8583
8584       # local check
8585       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
8586       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
8587       self.hv_new = hv_new # the new actual values
8588       self.hv_inst = i_hvdict # the new dict (without defaults)
8589     else:
8590       self.hv_new = self.hv_inst = {}
8591
8592     # beparams processing
8593     if self.op.beparams:
8594       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
8595                                    use_none=True)
8596       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
8597       be_new = cluster.SimpleFillBE(i_bedict)
8598       self.be_new = be_new # the new actual values
8599       self.be_inst = i_bedict # the new dict (without defaults)
8600     else:
8601       self.be_new = self.be_inst = {}
8602
8603     # osparams processing
8604     if self.op.osparams:
8605       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
8606       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
8607       self.os_new = cluster.SimpleFillOS(instance_os, i_osdict)
8608       self.os_inst = i_osdict # the new dict (without defaults)
8609     else:
8610       self.os_new = self.os_inst = {}
8611
8612     self.warn = []
8613
8614     if constants.BE_MEMORY in self.op.beparams and not self.force:
8615       mem_check_list = [pnode]
8616       if be_new[constants.BE_AUTO_BALANCE]:
8617         # either we changed auto_balance to yes or it was from before
8618         mem_check_list.extend(instance.secondary_nodes)
8619       instance_info = self.rpc.call_instance_info(pnode, instance.name,
8620                                                   instance.hypervisor)
8621       nodeinfo = self.rpc.call_node_info(mem_check_list, self.cfg.GetVGName(),
8622                                          instance.hypervisor)
8623       pninfo = nodeinfo[pnode]
8624       msg = pninfo.fail_msg
8625       if msg:
8626         # Assume the primary node is unreachable and go ahead
8627         self.warn.append("Can't get info from primary node %s: %s" %
8628                          (pnode,  msg))
8629       elif not isinstance(pninfo.payload.get('memory_free', None), int):
8630         self.warn.append("Node data from primary node %s doesn't contain"
8631                          " free memory information" % pnode)
8632       elif instance_info.fail_msg:
8633         self.warn.append("Can't get instance runtime information: %s" %
8634                         instance_info.fail_msg)
8635       else:
8636         if instance_info.payload:
8637           current_mem = int(instance_info.payload['memory'])
8638         else:
8639           # Assume instance not running
8640           # (there is a slight race condition here, but it's not very probable,
8641           # and we have no other way to check)
8642           current_mem = 0
8643         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
8644                     pninfo.payload['memory_free'])
8645         if miss_mem > 0:
8646           raise errors.OpPrereqError("This change will prevent the instance"
8647                                      " from starting, due to %d MB of memory"
8648                                      " missing on its primary node" % miss_mem,
8649                                      errors.ECODE_NORES)
8650
8651       if be_new[constants.BE_AUTO_BALANCE]:
8652         for node, nres in nodeinfo.items():
8653           if node not in instance.secondary_nodes:
8654             continue
8655           msg = nres.fail_msg
8656           if msg:
8657             self.warn.append("Can't get info from secondary node %s: %s" %
8658                              (node, msg))
8659           elif not isinstance(nres.payload.get('memory_free', None), int):
8660             self.warn.append("Secondary node %s didn't return free"
8661                              " memory information" % node)
8662           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
8663             self.warn.append("Not enough memory to failover instance to"
8664                              " secondary node %s" % node)
8665
8666     # NIC processing
8667     self.nic_pnew = {}
8668     self.nic_pinst = {}
8669     for nic_op, nic_dict in self.op.nics:
8670       if nic_op == constants.DDM_REMOVE:
8671         if not instance.nics:
8672           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
8673                                      errors.ECODE_INVAL)
8674         continue
8675       if nic_op != constants.DDM_ADD:
8676         # an existing nic
8677         if not instance.nics:
8678           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
8679                                      " no NICs" % nic_op,
8680                                      errors.ECODE_INVAL)
8681         if nic_op < 0 or nic_op >= len(instance.nics):
8682           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
8683                                      " are 0 to %d" %
8684                                      (nic_op, len(instance.nics) - 1),
8685                                      errors.ECODE_INVAL)
8686         old_nic_params = instance.nics[nic_op].nicparams
8687         old_nic_ip = instance.nics[nic_op].ip
8688       else:
8689         old_nic_params = {}
8690         old_nic_ip = None
8691
8692       update_params_dict = dict([(key, nic_dict[key])
8693                                  for key in constants.NICS_PARAMETERS
8694                                  if key in nic_dict])
8695
8696       if 'bridge' in nic_dict:
8697         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
8698
8699       new_nic_params = _GetUpdatedParams(old_nic_params,
8700                                          update_params_dict)
8701       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
8702       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
8703       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
8704       self.nic_pinst[nic_op] = new_nic_params
8705       self.nic_pnew[nic_op] = new_filled_nic_params
8706       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
8707
8708       if new_nic_mode == constants.NIC_MODE_BRIDGED:
8709         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
8710         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
8711         if msg:
8712           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
8713           if self.force:
8714             self.warn.append(msg)
8715           else:
8716             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
8717       if new_nic_mode == constants.NIC_MODE_ROUTED:
8718         if 'ip' in nic_dict:
8719           nic_ip = nic_dict['ip']
8720         else:
8721           nic_ip = old_nic_ip
8722         if nic_ip is None:
8723           raise errors.OpPrereqError('Cannot set the nic ip to None'
8724                                      ' on a routed nic', errors.ECODE_INVAL)
8725       if 'mac' in nic_dict:
8726         nic_mac = nic_dict['mac']
8727         if nic_mac is None:
8728           raise errors.OpPrereqError('Cannot set the nic mac to None',
8729                                      errors.ECODE_INVAL)
8730         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8731           # otherwise generate the mac
8732           nic_dict['mac'] = self.cfg.GenerateMAC(self.proc.GetECId())
8733         else:
8734           # or validate/reserve the current one
8735           try:
8736             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
8737           except errors.ReservationError:
8738             raise errors.OpPrereqError("MAC address %s already in use"
8739                                        " in cluster" % nic_mac,
8740                                        errors.ECODE_NOTUNIQUE)
8741
8742     # DISK processing
8743     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
8744       raise errors.OpPrereqError("Disk operations not supported for"
8745                                  " diskless instances",
8746                                  errors.ECODE_INVAL)
8747     for disk_op, _ in self.op.disks:
8748       if disk_op == constants.DDM_REMOVE:
8749         if len(instance.disks) == 1:
8750           raise errors.OpPrereqError("Cannot remove the last disk of"
8751                                      " an instance", errors.ECODE_INVAL)
8752         _CheckInstanceDown(self, instance, "cannot remove disks")
8753
8754       if (disk_op == constants.DDM_ADD and
8755           len(instance.nics) >= constants.MAX_DISKS):
8756         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
8757                                    " add more" % constants.MAX_DISKS,
8758                                    errors.ECODE_STATE)
8759       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
8760         # an existing disk
8761         if disk_op < 0 or disk_op >= len(instance.disks):
8762           raise errors.OpPrereqError("Invalid disk index %s, valid values"
8763                                      " are 0 to %d" %
8764                                      (disk_op, len(instance.disks)),
8765                                      errors.ECODE_INVAL)
8766
8767     return
8768
8769   def _ConvertPlainToDrbd(self, feedback_fn):
8770     """Converts an instance from plain to drbd.
8771
8772     """
8773     feedback_fn("Converting template to drbd")
8774     instance = self.instance
8775     pnode = instance.primary_node
8776     snode = self.op.remote_node
8777
8778     # create a fake disk info for _GenerateDiskTemplate
8779     disk_info = [{"size": d.size, "mode": d.mode} for d in instance.disks]
8780     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
8781                                       instance.name, pnode, [snode],
8782                                       disk_info, None, None, 0)
8783     info = _GetInstanceInfoText(instance)
8784     feedback_fn("Creating aditional volumes...")
8785     # first, create the missing data and meta devices
8786     for disk in new_disks:
8787       # unfortunately this is... not too nice
8788       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
8789                             info, True)
8790       for child in disk.children:
8791         _CreateSingleBlockDev(self, snode, instance, child, info, True)
8792     # at this stage, all new LVs have been created, we can rename the
8793     # old ones
8794     feedback_fn("Renaming original volumes...")
8795     rename_list = [(o, n.children[0].logical_id)
8796                    for (o, n) in zip(instance.disks, new_disks)]
8797     result = self.rpc.call_blockdev_rename(pnode, rename_list)
8798     result.Raise("Failed to rename original LVs")
8799
8800     feedback_fn("Initializing DRBD devices...")
8801     # all child devices are in place, we can now create the DRBD devices
8802     for disk in new_disks:
8803       for node in [pnode, snode]:
8804         f_create = node == pnode
8805         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
8806
8807     # at this point, the instance has been modified
8808     instance.disk_template = constants.DT_DRBD8
8809     instance.disks = new_disks
8810     self.cfg.Update(instance, feedback_fn)
8811
8812     # disks are created, waiting for sync
8813     disk_abort = not _WaitForSync(self, instance)
8814     if disk_abort:
8815       raise errors.OpExecError("There are some degraded disks for"
8816                                " this instance, please cleanup manually")
8817
8818   def _ConvertDrbdToPlain(self, feedback_fn):
8819     """Converts an instance from drbd to plain.
8820
8821     """
8822     instance = self.instance
8823     assert len(instance.secondary_nodes) == 1
8824     pnode = instance.primary_node
8825     snode = instance.secondary_nodes[0]
8826     feedback_fn("Converting template to plain")
8827
8828     old_disks = instance.disks
8829     new_disks = [d.children[0] for d in old_disks]
8830
8831     # copy over size and mode
8832     for parent, child in zip(old_disks, new_disks):
8833       child.size = parent.size
8834       child.mode = parent.mode
8835
8836     # update instance structure
8837     instance.disks = new_disks
8838     instance.disk_template = constants.DT_PLAIN
8839     self.cfg.Update(instance, feedback_fn)
8840
8841     feedback_fn("Removing volumes on the secondary node...")
8842     for disk in old_disks:
8843       self.cfg.SetDiskID(disk, snode)
8844       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
8845       if msg:
8846         self.LogWarning("Could not remove block device %s on node %s,"
8847                         " continuing anyway: %s", disk.iv_name, snode, msg)
8848
8849     feedback_fn("Removing unneeded volumes on the primary node...")
8850     for idx, disk in enumerate(old_disks):
8851       meta = disk.children[1]
8852       self.cfg.SetDiskID(meta, pnode)
8853       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
8854       if msg:
8855         self.LogWarning("Could not remove metadata for disk %d on node %s,"
8856                         " continuing anyway: %s", idx, pnode, msg)
8857
8858
8859   def Exec(self, feedback_fn):
8860     """Modifies an instance.
8861
8862     All parameters take effect only at the next restart of the instance.
8863
8864     """
8865     # Process here the warnings from CheckPrereq, as we don't have a
8866     # feedback_fn there.
8867     for warn in self.warn:
8868       feedback_fn("WARNING: %s" % warn)
8869
8870     result = []
8871     instance = self.instance
8872     # disk changes
8873     for disk_op, disk_dict in self.op.disks:
8874       if disk_op == constants.DDM_REMOVE:
8875         # remove the last disk
8876         device = instance.disks.pop()
8877         device_idx = len(instance.disks)
8878         for node, disk in device.ComputeNodeTree(instance.primary_node):
8879           self.cfg.SetDiskID(disk, node)
8880           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
8881           if msg:
8882             self.LogWarning("Could not remove disk/%d on node %s: %s,"
8883                             " continuing anyway", device_idx, node, msg)
8884         result.append(("disk/%d" % device_idx, "remove"))
8885       elif disk_op == constants.DDM_ADD:
8886         # add a new disk
8887         if instance.disk_template == constants.DT_FILE:
8888           file_driver, file_path = instance.disks[0].logical_id
8889           file_path = os.path.dirname(file_path)
8890         else:
8891           file_driver = file_path = None
8892         disk_idx_base = len(instance.disks)
8893         new_disk = _GenerateDiskTemplate(self,
8894                                          instance.disk_template,
8895                                          instance.name, instance.primary_node,
8896                                          instance.secondary_nodes,
8897                                          [disk_dict],
8898                                          file_path,
8899                                          file_driver,
8900                                          disk_idx_base)[0]
8901         instance.disks.append(new_disk)
8902         info = _GetInstanceInfoText(instance)
8903
8904         logging.info("Creating volume %s for instance %s",
8905                      new_disk.iv_name, instance.name)
8906         # Note: this needs to be kept in sync with _CreateDisks
8907         #HARDCODE
8908         for node in instance.all_nodes:
8909           f_create = node == instance.primary_node
8910           try:
8911             _CreateBlockDev(self, node, instance, new_disk,
8912                             f_create, info, f_create)
8913           except errors.OpExecError, err:
8914             self.LogWarning("Failed to create volume %s (%s) on"
8915                             " node %s: %s",
8916                             new_disk.iv_name, new_disk, node, err)
8917         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
8918                        (new_disk.size, new_disk.mode)))
8919       else:
8920         # change a given disk
8921         instance.disks[disk_op].mode = disk_dict['mode']
8922         result.append(("disk.mode/%d" % disk_op, disk_dict['mode']))
8923
8924     if self.op.disk_template:
8925       r_shut = _ShutdownInstanceDisks(self, instance)
8926       if not r_shut:
8927         raise errors.OpExecError("Cannot shutdow instance disks, unable to"
8928                                  " proceed with disk template conversion")
8929       mode = (instance.disk_template, self.op.disk_template)
8930       try:
8931         self._DISK_CONVERSIONS[mode](self, feedback_fn)
8932       except:
8933         self.cfg.ReleaseDRBDMinors(instance.name)
8934         raise
8935       result.append(("disk_template", self.op.disk_template))
8936
8937     # NIC changes
8938     for nic_op, nic_dict in self.op.nics:
8939       if nic_op == constants.DDM_REMOVE:
8940         # remove the last nic
8941         del instance.nics[-1]
8942         result.append(("nic.%d" % len(instance.nics), "remove"))
8943       elif nic_op == constants.DDM_ADD:
8944         # mac and bridge should be set, by now
8945         mac = nic_dict['mac']
8946         ip = nic_dict.get('ip', None)
8947         nicparams = self.nic_pinst[constants.DDM_ADD]
8948         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
8949         instance.nics.append(new_nic)
8950         result.append(("nic.%d" % (len(instance.nics) - 1),
8951                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
8952                        (new_nic.mac, new_nic.ip,
8953                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
8954                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
8955                        )))
8956       else:
8957         for key in 'mac', 'ip':
8958           if key in nic_dict:
8959             setattr(instance.nics[nic_op], key, nic_dict[key])
8960         if nic_op in self.nic_pinst:
8961           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
8962         for key, val in nic_dict.iteritems():
8963           result.append(("nic.%s/%d" % (key, nic_op), val))
8964
8965     # hvparams changes
8966     if self.op.hvparams:
8967       instance.hvparams = self.hv_inst
8968       for key, val in self.op.hvparams.iteritems():
8969         result.append(("hv/%s" % key, val))
8970
8971     # beparams changes
8972     if self.op.beparams:
8973       instance.beparams = self.be_inst
8974       for key, val in self.op.beparams.iteritems():
8975         result.append(("be/%s" % key, val))
8976
8977     # OS change
8978     if self.op.os_name:
8979       instance.os = self.op.os_name
8980
8981     # osparams changes
8982     if self.op.osparams:
8983       instance.osparams = self.os_inst
8984       for key, val in self.op.osparams.iteritems():
8985         result.append(("os/%s" % key, val))
8986
8987     self.cfg.Update(instance, feedback_fn)
8988
8989     return result
8990
8991   _DISK_CONVERSIONS = {
8992     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
8993     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
8994     }
8995
8996
8997 class LUQueryExports(NoHooksLU):
8998   """Query the exports list
8999
9000   """
9001   _OP_REQP = ['nodes']
9002   REQ_BGL = False
9003
9004   def ExpandNames(self):
9005     self.needed_locks = {}
9006     self.share_locks[locking.LEVEL_NODE] = 1
9007     if not self.op.nodes:
9008       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9009     else:
9010       self.needed_locks[locking.LEVEL_NODE] = \
9011         _GetWantedNodes(self, self.op.nodes)
9012
9013   def CheckPrereq(self):
9014     """Check prerequisites.
9015
9016     """
9017     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
9018
9019   def Exec(self, feedback_fn):
9020     """Compute the list of all the exported system images.
9021
9022     @rtype: dict
9023     @return: a dictionary with the structure node->(export-list)
9024         where export-list is a list of the instances exported on
9025         that node.
9026
9027     """
9028     rpcresult = self.rpc.call_export_list(self.nodes)
9029     result = {}
9030     for node in rpcresult:
9031       if rpcresult[node].fail_msg:
9032         result[node] = False
9033       else:
9034         result[node] = rpcresult[node].payload
9035
9036     return result
9037
9038
9039 class LUPrepareExport(NoHooksLU):
9040   """Prepares an instance for an export and returns useful information.
9041
9042   """
9043   _OP_REQP = ["instance_name", "mode"]
9044   REQ_BGL = False
9045
9046   def CheckArguments(self):
9047     """Check the arguments.
9048
9049     """
9050     if self.op.mode not in constants.EXPORT_MODES:
9051       raise errors.OpPrereqError("Invalid export mode %r" % self.op.mode,
9052                                  errors.ECODE_INVAL)
9053
9054   def ExpandNames(self):
9055     self._ExpandAndLockInstance()
9056
9057   def CheckPrereq(self):
9058     """Check prerequisites.
9059
9060     """
9061     instance_name = self.op.instance_name
9062
9063     self.instance = self.cfg.GetInstanceInfo(instance_name)
9064     assert self.instance is not None, \
9065           "Cannot retrieve locked instance %s" % self.op.instance_name
9066     _CheckNodeOnline(self, self.instance.primary_node)
9067
9068     self._cds = _GetClusterDomainSecret()
9069
9070   def Exec(self, feedback_fn):
9071     """Prepares an instance for an export.
9072
9073     """
9074     instance = self.instance
9075
9076     if self.op.mode == constants.EXPORT_MODE_REMOTE:
9077       salt = utils.GenerateSecret(8)
9078
9079       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
9080       result = self.rpc.call_x509_cert_create(instance.primary_node,
9081                                               constants.RIE_CERT_VALIDITY)
9082       result.Raise("Can't create X509 key and certificate on %s" % result.node)
9083
9084       (name, cert_pem) = result.payload
9085
9086       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
9087                                              cert_pem)
9088
9089       return {
9090         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
9091         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
9092                           salt),
9093         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
9094         }
9095
9096     return None
9097
9098
9099 class LUExportInstance(LogicalUnit):
9100   """Export an instance to an image in the cluster.
9101
9102   """
9103   HPATH = "instance-export"
9104   HTYPE = constants.HTYPE_INSTANCE
9105   _OP_REQP = ["instance_name", "target_node", "shutdown"]
9106   REQ_BGL = False
9107
9108   def CheckArguments(self):
9109     """Check the arguments.
9110
9111     """
9112     _CheckBooleanOpField(self.op, "remove_instance")
9113     _CheckBooleanOpField(self.op, "ignore_remove_failures")
9114
9115     self.shutdown_timeout = getattr(self.op, "shutdown_timeout",
9116                                     constants.DEFAULT_SHUTDOWN_TIMEOUT)
9117     self.remove_instance = getattr(self.op, "remove_instance", False)
9118     self.ignore_remove_failures = getattr(self.op, "ignore_remove_failures",
9119                                           False)
9120     self.export_mode = getattr(self.op, "mode", constants.EXPORT_MODE_LOCAL)
9121     self.x509_key_name = getattr(self.op, "x509_key_name", None)
9122     self.dest_x509_ca_pem = getattr(self.op, "destination_x509_ca", None)
9123
9124     if self.remove_instance and not self.op.shutdown:
9125       raise errors.OpPrereqError("Can not remove instance without shutting it"
9126                                  " down before")
9127
9128     if self.export_mode not in constants.EXPORT_MODES:
9129       raise errors.OpPrereqError("Invalid export mode %r" % self.export_mode,
9130                                  errors.ECODE_INVAL)
9131
9132     if self.export_mode == constants.EXPORT_MODE_REMOTE:
9133       if not self.x509_key_name:
9134         raise errors.OpPrereqError("Missing X509 key name for encryption",
9135                                    errors.ECODE_INVAL)
9136
9137       if not self.dest_x509_ca_pem:
9138         raise errors.OpPrereqError("Missing destination X509 CA",
9139                                    errors.ECODE_INVAL)
9140
9141   def ExpandNames(self):
9142     self._ExpandAndLockInstance()
9143
9144     # Lock all nodes for local exports
9145     if self.export_mode == constants.EXPORT_MODE_LOCAL:
9146       # FIXME: lock only instance primary and destination node
9147       #
9148       # Sad but true, for now we have do lock all nodes, as we don't know where
9149       # the previous export might be, and in this LU we search for it and
9150       # remove it from its current node. In the future we could fix this by:
9151       #  - making a tasklet to search (share-lock all), then create the new one,
9152       #    then one to remove, after
9153       #  - removing the removal operation altogether
9154       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9155
9156   def DeclareLocks(self, level):
9157     """Last minute lock declaration."""
9158     # All nodes are locked anyway, so nothing to do here.
9159
9160   def BuildHooksEnv(self):
9161     """Build hooks env.
9162
9163     This will run on the master, primary node and target node.
9164
9165     """
9166     env = {
9167       "EXPORT_MODE": self.export_mode,
9168       "EXPORT_NODE": self.op.target_node,
9169       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
9170       "SHUTDOWN_TIMEOUT": self.shutdown_timeout,
9171       # TODO: Generic function for boolean env variables
9172       "REMOVE_INSTANCE": str(bool(self.remove_instance)),
9173       }
9174
9175     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9176
9177     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
9178
9179     if self.export_mode == constants.EXPORT_MODE_LOCAL:
9180       nl.append(self.op.target_node)
9181
9182     return env, nl, nl
9183
9184   def CheckPrereq(self):
9185     """Check prerequisites.
9186
9187     This checks that the instance and node names are valid.
9188
9189     """
9190     instance_name = self.op.instance_name
9191
9192     self.instance = self.cfg.GetInstanceInfo(instance_name)
9193     assert self.instance is not None, \
9194           "Cannot retrieve locked instance %s" % self.op.instance_name
9195     _CheckNodeOnline(self, self.instance.primary_node)
9196
9197     if self.export_mode == constants.EXPORT_MODE_LOCAL:
9198       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
9199       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
9200       assert self.dst_node is not None
9201
9202       _CheckNodeOnline(self, self.dst_node.name)
9203       _CheckNodeNotDrained(self, self.dst_node.name)
9204
9205       self._cds = None
9206       self.dest_disk_info = None
9207       self.dest_x509_ca = None
9208
9209     elif self.export_mode == constants.EXPORT_MODE_REMOTE:
9210       self.dst_node = None
9211
9212       if len(self.op.target_node) != len(self.instance.disks):
9213         raise errors.OpPrereqError(("Received destination information for %s"
9214                                     " disks, but instance %s has %s disks") %
9215                                    (len(self.op.target_node), instance_name,
9216                                     len(self.instance.disks)),
9217                                    errors.ECODE_INVAL)
9218
9219       cds = _GetClusterDomainSecret()
9220
9221       # Check X509 key name
9222       try:
9223         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
9224       except (TypeError, ValueError), err:
9225         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
9226
9227       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
9228         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
9229                                    errors.ECODE_INVAL)
9230
9231       # Load and verify CA
9232       try:
9233         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
9234       except OpenSSL.crypto.Error, err:
9235         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
9236                                    (err, ), errors.ECODE_INVAL)
9237
9238       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
9239       if errcode is not None:
9240         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" % (msg, ),
9241                                    errors.ECODE_INVAL)
9242
9243       self.dest_x509_ca = cert
9244
9245       # Verify target information
9246       disk_info = []
9247       for idx, disk_data in enumerate(self.op.target_node):
9248         try:
9249           (host, port, magic) = \
9250             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
9251         except errors.GenericError, err:
9252           raise errors.OpPrereqError("Target info for disk %s: %s" % (idx, err),
9253                                      errors.ECODE_INVAL)
9254
9255         disk_info.append((host, port, magic))
9256
9257       assert len(disk_info) == len(self.op.target_node)
9258       self.dest_disk_info = disk_info
9259
9260     else:
9261       raise errors.ProgrammerError("Unhandled export mode %r" %
9262                                    self.export_mode)
9263
9264     # instance disk type verification
9265     # TODO: Implement export support for file-based disks
9266     for disk in self.instance.disks:
9267       if disk.dev_type == constants.LD_FILE:
9268         raise errors.OpPrereqError("Export not supported for instances with"
9269                                    " file-based disks", errors.ECODE_INVAL)
9270
9271   def _CleanupExports(self, feedback_fn):
9272     """Removes exports of current instance from all other nodes.
9273
9274     If an instance in a cluster with nodes A..D was exported to node C, its
9275     exports will be removed from the nodes A, B and D.
9276
9277     """
9278     assert self.export_mode != constants.EXPORT_MODE_REMOTE
9279
9280     nodelist = self.cfg.GetNodeList()
9281     nodelist.remove(self.dst_node.name)
9282
9283     # on one-node clusters nodelist will be empty after the removal
9284     # if we proceed the backup would be removed because OpQueryExports
9285     # substitutes an empty list with the full cluster node list.
9286     iname = self.instance.name
9287     if nodelist:
9288       feedback_fn("Removing old exports for instance %s" % iname)
9289       exportlist = self.rpc.call_export_list(nodelist)
9290       for node in exportlist:
9291         if exportlist[node].fail_msg:
9292           continue
9293         if iname in exportlist[node].payload:
9294           msg = self.rpc.call_export_remove(node, iname).fail_msg
9295           if msg:
9296             self.LogWarning("Could not remove older export for instance %s"
9297                             " on node %s: %s", iname, node, msg)
9298
9299   def Exec(self, feedback_fn):
9300     """Export an instance to an image in the cluster.
9301
9302     """
9303     assert self.export_mode in constants.EXPORT_MODES
9304
9305     instance = self.instance
9306     src_node = instance.primary_node
9307
9308     if self.op.shutdown:
9309       # shutdown the instance, but not the disks
9310       feedback_fn("Shutting down instance %s" % instance.name)
9311       result = self.rpc.call_instance_shutdown(src_node, instance,
9312                                                self.shutdown_timeout)
9313       # TODO: Maybe ignore failures if ignore_remove_failures is set
9314       result.Raise("Could not shutdown instance %s on"
9315                    " node %s" % (instance.name, src_node))
9316
9317     # set the disks ID correctly since call_instance_start needs the
9318     # correct drbd minor to create the symlinks
9319     for disk in instance.disks:
9320       self.cfg.SetDiskID(disk, src_node)
9321
9322     activate_disks = (not instance.admin_up)
9323
9324     if activate_disks:
9325       # Activate the instance disks if we'exporting a stopped instance
9326       feedback_fn("Activating disks for %s" % instance.name)
9327       _StartInstanceDisks(self, instance, None)
9328
9329     try:
9330       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
9331                                                      instance)
9332
9333       helper.CreateSnapshots()
9334       try:
9335         if (self.op.shutdown and instance.admin_up and
9336             not self.remove_instance):
9337           assert not activate_disks
9338           feedback_fn("Starting instance %s" % instance.name)
9339           result = self.rpc.call_instance_start(src_node, instance, None, None)
9340           msg = result.fail_msg
9341           if msg:
9342             feedback_fn("Failed to start instance: %s" % msg)
9343             _ShutdownInstanceDisks(self, instance)
9344             raise errors.OpExecError("Could not start instance: %s" % msg)
9345
9346         if self.export_mode == constants.EXPORT_MODE_LOCAL:
9347           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
9348         elif self.export_mode == constants.EXPORT_MODE_REMOTE:
9349           connect_timeout = constants.RIE_CONNECT_TIMEOUT
9350           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
9351
9352           (key_name, _, _) = self.x509_key_name
9353
9354           dest_ca_pem = \
9355             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
9356                                             self.dest_x509_ca)
9357
9358           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
9359                                                      key_name, dest_ca_pem,
9360                                                      timeouts)
9361       finally:
9362         helper.Cleanup()
9363
9364       # Check for backwards compatibility
9365       assert len(dresults) == len(instance.disks)
9366       assert compat.all(isinstance(i, bool) for i in dresults), \
9367              "Not all results are boolean: %r" % dresults
9368
9369     finally:
9370       if activate_disks:
9371         feedback_fn("Deactivating disks for %s" % instance.name)
9372         _ShutdownInstanceDisks(self, instance)
9373
9374     # Remove instance if requested
9375     if self.remove_instance:
9376       if not (compat.all(dresults) and fin_resu):
9377         feedback_fn("Not removing instance %s as parts of the export failed" %
9378                     instance.name)
9379       else:
9380         feedback_fn("Removing instance %s" % instance.name)
9381         _RemoveInstance(self, feedback_fn, instance,
9382                         self.ignore_remove_failures)
9383
9384     if self.export_mode == constants.EXPORT_MODE_LOCAL:
9385       self._CleanupExports(feedback_fn)
9386
9387     return fin_resu, dresults
9388
9389
9390 class LURemoveExport(NoHooksLU):
9391   """Remove exports related to the named instance.
9392
9393   """
9394   _OP_REQP = ["instance_name"]
9395   REQ_BGL = False
9396
9397   def ExpandNames(self):
9398     self.needed_locks = {}
9399     # We need all nodes to be locked in order for RemoveExport to work, but we
9400     # don't need to lock the instance itself, as nothing will happen to it (and
9401     # we can remove exports also for a removed instance)
9402     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
9403
9404   def CheckPrereq(self):
9405     """Check prerequisites.
9406     """
9407     pass
9408
9409   def Exec(self, feedback_fn):
9410     """Remove any export.
9411
9412     """
9413     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
9414     # If the instance was not found we'll try with the name that was passed in.
9415     # This will only work if it was an FQDN, though.
9416     fqdn_warn = False
9417     if not instance_name:
9418       fqdn_warn = True
9419       instance_name = self.op.instance_name
9420
9421     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
9422     exportlist = self.rpc.call_export_list(locked_nodes)
9423     found = False
9424     for node in exportlist:
9425       msg = exportlist[node].fail_msg
9426       if msg:
9427         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
9428         continue
9429       if instance_name in exportlist[node].payload:
9430         found = True
9431         result = self.rpc.call_export_remove(node, instance_name)
9432         msg = result.fail_msg
9433         if msg:
9434           logging.error("Could not remove export for instance %s"
9435                         " on node %s: %s", instance_name, node, msg)
9436
9437     if fqdn_warn and not found:
9438       feedback_fn("Export not found. If trying to remove an export belonging"
9439                   " to a deleted instance please use its Fully Qualified"
9440                   " Domain Name.")
9441
9442
9443 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
9444   """Generic tags LU.
9445
9446   This is an abstract class which is the parent of all the other tags LUs.
9447
9448   """
9449
9450   def ExpandNames(self):
9451     self.needed_locks = {}
9452     if self.op.kind == constants.TAG_NODE:
9453       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
9454       self.needed_locks[locking.LEVEL_NODE] = self.op.name
9455     elif self.op.kind == constants.TAG_INSTANCE:
9456       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
9457       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
9458
9459   def CheckPrereq(self):
9460     """Check prerequisites.
9461
9462     """
9463     if self.op.kind == constants.TAG_CLUSTER:
9464       self.target = self.cfg.GetClusterInfo()
9465     elif self.op.kind == constants.TAG_NODE:
9466       self.target = self.cfg.GetNodeInfo(self.op.name)
9467     elif self.op.kind == constants.TAG_INSTANCE:
9468       self.target = self.cfg.GetInstanceInfo(self.op.name)
9469     else:
9470       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
9471                                  str(self.op.kind), errors.ECODE_INVAL)
9472
9473
9474 class LUGetTags(TagsLU):
9475   """Returns the tags of a given object.
9476
9477   """
9478   _OP_REQP = ["kind", "name"]
9479   REQ_BGL = False
9480
9481   def Exec(self, feedback_fn):
9482     """Returns the tag list.
9483
9484     """
9485     return list(self.target.GetTags())
9486
9487
9488 class LUSearchTags(NoHooksLU):
9489   """Searches the tags for a given pattern.
9490
9491   """
9492   _OP_REQP = ["pattern"]
9493   REQ_BGL = False
9494
9495   def ExpandNames(self):
9496     self.needed_locks = {}
9497
9498   def CheckPrereq(self):
9499     """Check prerequisites.
9500
9501     This checks the pattern passed for validity by compiling it.
9502
9503     """
9504     try:
9505       self.re = re.compile(self.op.pattern)
9506     except re.error, err:
9507       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
9508                                  (self.op.pattern, err), errors.ECODE_INVAL)
9509
9510   def Exec(self, feedback_fn):
9511     """Returns the tag list.
9512
9513     """
9514     cfg = self.cfg
9515     tgts = [("/cluster", cfg.GetClusterInfo())]
9516     ilist = cfg.GetAllInstancesInfo().values()
9517     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
9518     nlist = cfg.GetAllNodesInfo().values()
9519     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
9520     results = []
9521     for path, target in tgts:
9522       for tag in target.GetTags():
9523         if self.re.search(tag):
9524           results.append((path, tag))
9525     return results
9526
9527
9528 class LUAddTags(TagsLU):
9529   """Sets a tag on a given object.
9530
9531   """
9532   _OP_REQP = ["kind", "name", "tags"]
9533   REQ_BGL = False
9534
9535   def CheckPrereq(self):
9536     """Check prerequisites.
9537
9538     This checks the type and length of the tag name and value.
9539
9540     """
9541     TagsLU.CheckPrereq(self)
9542     for tag in self.op.tags:
9543       objects.TaggableObject.ValidateTag(tag)
9544
9545   def Exec(self, feedback_fn):
9546     """Sets the tag.
9547
9548     """
9549     try:
9550       for tag in self.op.tags:
9551         self.target.AddTag(tag)
9552     except errors.TagError, err:
9553       raise errors.OpExecError("Error while setting tag: %s" % str(err))
9554     self.cfg.Update(self.target, feedback_fn)
9555
9556
9557 class LUDelTags(TagsLU):
9558   """Delete a list of tags from a given object.
9559
9560   """
9561   _OP_REQP = ["kind", "name", "tags"]
9562   REQ_BGL = False
9563
9564   def CheckPrereq(self):
9565     """Check prerequisites.
9566
9567     This checks that we have the given tag.
9568
9569     """
9570     TagsLU.CheckPrereq(self)
9571     for tag in self.op.tags:
9572       objects.TaggableObject.ValidateTag(tag)
9573     del_tags = frozenset(self.op.tags)
9574     cur_tags = self.target.GetTags()
9575     if not del_tags <= cur_tags:
9576       diff_tags = del_tags - cur_tags
9577       diff_names = ["'%s'" % tag for tag in diff_tags]
9578       diff_names.sort()
9579       raise errors.OpPrereqError("Tag(s) %s not found" %
9580                                  (",".join(diff_names)), errors.ECODE_NOENT)
9581
9582   def Exec(self, feedback_fn):
9583     """Remove the tag from the object.
9584
9585     """
9586     for tag in self.op.tags:
9587       self.target.RemoveTag(tag)
9588     self.cfg.Update(self.target, feedback_fn)
9589
9590
9591 class LUTestDelay(NoHooksLU):
9592   """Sleep for a specified amount of time.
9593
9594   This LU sleeps on the master and/or nodes for a specified amount of
9595   time.
9596
9597   """
9598   _OP_REQP = ["duration", "on_master", "on_nodes"]
9599   REQ_BGL = False
9600
9601   def CheckArguments(self):
9602     # TODO: convert to the type system
9603     self.op.repeat = getattr(self.op, "repeat", 0)
9604     if self.op.repeat < 0:
9605       raise errors.OpPrereqError("Repetition count cannot be negative")
9606
9607   def ExpandNames(self):
9608     """Expand names and set required locks.
9609
9610     This expands the node list, if any.
9611
9612     """
9613     self.needed_locks = {}
9614     if self.op.on_nodes:
9615       # _GetWantedNodes can be used here, but is not always appropriate to use
9616       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
9617       # more information.
9618       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
9619       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
9620
9621   def CheckPrereq(self):
9622     """Check prerequisites.
9623
9624     """
9625
9626   def _TestDelay(self):
9627     """Do the actual sleep.
9628
9629     """
9630     if self.op.on_master:
9631       if not utils.TestDelay(self.op.duration):
9632         raise errors.OpExecError("Error during master delay test")
9633     if self.op.on_nodes:
9634       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
9635       for node, node_result in result.items():
9636         node_result.Raise("Failure during rpc call to node %s" % node)
9637
9638   def Exec(self, feedback_fn):
9639     """Execute the test delay opcode, with the wanted repetitions.
9640
9641     """
9642     if self.op.repeat == 0:
9643       self._TestDelay()
9644     else:
9645       top_value = self.op.repeat - 1
9646       for i in range(self.op.repeat):
9647         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
9648         self._TestDelay()
9649
9650
9651 class IAllocator(object):
9652   """IAllocator framework.
9653
9654   An IAllocator instance has three sets of attributes:
9655     - cfg that is needed to query the cluster
9656     - input data (all members of the _KEYS class attribute are required)
9657     - four buffer attributes (in|out_data|text), that represent the
9658       input (to the external script) in text and data structure format,
9659       and the output from it, again in two formats
9660     - the result variables from the script (success, info, nodes) for
9661       easy usage
9662
9663   """
9664   # pylint: disable-msg=R0902
9665   # lots of instance attributes
9666   _ALLO_KEYS = [
9667     "name", "mem_size", "disks", "disk_template",
9668     "os", "tags", "nics", "vcpus", "hypervisor",
9669     ]
9670   _RELO_KEYS = [
9671     "name", "relocate_from",
9672     ]
9673   _EVAC_KEYS = [
9674     "evac_nodes",
9675     ]
9676
9677   def __init__(self, cfg, rpc, mode, **kwargs):
9678     self.cfg = cfg
9679     self.rpc = rpc
9680     # init buffer variables
9681     self.in_text = self.out_text = self.in_data = self.out_data = None
9682     # init all input fields so that pylint is happy
9683     self.mode = mode
9684     self.mem_size = self.disks = self.disk_template = None
9685     self.os = self.tags = self.nics = self.vcpus = None
9686     self.hypervisor = None
9687     self.relocate_from = None
9688     self.name = None
9689     self.evac_nodes = None
9690     # computed fields
9691     self.required_nodes = None
9692     # init result fields
9693     self.success = self.info = self.result = None
9694     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9695       keyset = self._ALLO_KEYS
9696       fn = self._AddNewInstance
9697     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9698       keyset = self._RELO_KEYS
9699       fn = self._AddRelocateInstance
9700     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9701       keyset = self._EVAC_KEYS
9702       fn = self._AddEvacuateNodes
9703     else:
9704       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
9705                                    " IAllocator" % self.mode)
9706     for key in kwargs:
9707       if key not in keyset:
9708         raise errors.ProgrammerError("Invalid input parameter '%s' to"
9709                                      " IAllocator" % key)
9710       setattr(self, key, kwargs[key])
9711
9712     for key in keyset:
9713       if key not in kwargs:
9714         raise errors.ProgrammerError("Missing input parameter '%s' to"
9715                                      " IAllocator" % key)
9716     self._BuildInputData(fn)
9717
9718   def _ComputeClusterData(self):
9719     """Compute the generic allocator input data.
9720
9721     This is the data that is independent of the actual operation.
9722
9723     """
9724     cfg = self.cfg
9725     cluster_info = cfg.GetClusterInfo()
9726     # cluster data
9727     data = {
9728       "version": constants.IALLOCATOR_VERSION,
9729       "cluster_name": cfg.GetClusterName(),
9730       "cluster_tags": list(cluster_info.GetTags()),
9731       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
9732       # we don't have job IDs
9733       }
9734     iinfo = cfg.GetAllInstancesInfo().values()
9735     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
9736
9737     # node data
9738     node_results = {}
9739     node_list = cfg.GetNodeList()
9740
9741     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
9742       hypervisor_name = self.hypervisor
9743     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
9744       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
9745     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
9746       hypervisor_name = cluster_info.enabled_hypervisors[0]
9747
9748     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
9749                                         hypervisor_name)
9750     node_iinfo = \
9751       self.rpc.call_all_instances_info(node_list,
9752                                        cluster_info.enabled_hypervisors)
9753     for nname, nresult in node_data.items():
9754       # first fill in static (config-based) values
9755       ninfo = cfg.GetNodeInfo(nname)
9756       pnr = {
9757         "tags": list(ninfo.GetTags()),
9758         "primary_ip": ninfo.primary_ip,
9759         "secondary_ip": ninfo.secondary_ip,
9760         "offline": ninfo.offline,
9761         "drained": ninfo.drained,
9762         "master_candidate": ninfo.master_candidate,
9763         }
9764
9765       if not (ninfo.offline or ninfo.drained):
9766         nresult.Raise("Can't get data for node %s" % nname)
9767         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
9768                                 nname)
9769         remote_info = nresult.payload
9770
9771         for attr in ['memory_total', 'memory_free', 'memory_dom0',
9772                      'vg_size', 'vg_free', 'cpu_total']:
9773           if attr not in remote_info:
9774             raise errors.OpExecError("Node '%s' didn't return attribute"
9775                                      " '%s'" % (nname, attr))
9776           if not isinstance(remote_info[attr], int):
9777             raise errors.OpExecError("Node '%s' returned invalid value"
9778                                      " for '%s': %s" %
9779                                      (nname, attr, remote_info[attr]))
9780         # compute memory used by primary instances
9781         i_p_mem = i_p_up_mem = 0
9782         for iinfo, beinfo in i_list:
9783           if iinfo.primary_node == nname:
9784             i_p_mem += beinfo[constants.BE_MEMORY]
9785             if iinfo.name not in node_iinfo[nname].payload:
9786               i_used_mem = 0
9787             else:
9788               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
9789             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
9790             remote_info['memory_free'] -= max(0, i_mem_diff)
9791
9792             if iinfo.admin_up:
9793               i_p_up_mem += beinfo[constants.BE_MEMORY]
9794
9795         # compute memory used by instances
9796         pnr_dyn = {
9797           "total_memory": remote_info['memory_total'],
9798           "reserved_memory": remote_info['memory_dom0'],
9799           "free_memory": remote_info['memory_free'],
9800           "total_disk": remote_info['vg_size'],
9801           "free_disk": remote_info['vg_free'],
9802           "total_cpus": remote_info['cpu_total'],
9803           "i_pri_memory": i_p_mem,
9804           "i_pri_up_memory": i_p_up_mem,
9805           }
9806         pnr.update(pnr_dyn)
9807
9808       node_results[nname] = pnr
9809     data["nodes"] = node_results
9810
9811     # instance data
9812     instance_data = {}
9813     for iinfo, beinfo in i_list:
9814       nic_data = []
9815       for nic in iinfo.nics:
9816         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
9817         nic_dict = {"mac": nic.mac,
9818                     "ip": nic.ip,
9819                     "mode": filled_params[constants.NIC_MODE],
9820                     "link": filled_params[constants.NIC_LINK],
9821                    }
9822         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
9823           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
9824         nic_data.append(nic_dict)
9825       pir = {
9826         "tags": list(iinfo.GetTags()),
9827         "admin_up": iinfo.admin_up,
9828         "vcpus": beinfo[constants.BE_VCPUS],
9829         "memory": beinfo[constants.BE_MEMORY],
9830         "os": iinfo.os,
9831         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
9832         "nics": nic_data,
9833         "disks": [{"size": dsk.size, "mode": dsk.mode} for dsk in iinfo.disks],
9834         "disk_template": iinfo.disk_template,
9835         "hypervisor": iinfo.hypervisor,
9836         }
9837       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
9838                                                  pir["disks"])
9839       instance_data[iinfo.name] = pir
9840
9841     data["instances"] = instance_data
9842
9843     self.in_data = data
9844
9845   def _AddNewInstance(self):
9846     """Add new instance data to allocator structure.
9847
9848     This in combination with _AllocatorGetClusterData will create the
9849     correct structure needed as input for the allocator.
9850
9851     The checks for the completeness of the opcode must have already been
9852     done.
9853
9854     """
9855     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
9856
9857     if self.disk_template in constants.DTS_NET_MIRROR:
9858       self.required_nodes = 2
9859     else:
9860       self.required_nodes = 1
9861     request = {
9862       "name": self.name,
9863       "disk_template": self.disk_template,
9864       "tags": self.tags,
9865       "os": self.os,
9866       "vcpus": self.vcpus,
9867       "memory": self.mem_size,
9868       "disks": self.disks,
9869       "disk_space_total": disk_space,
9870       "nics": self.nics,
9871       "required_nodes": self.required_nodes,
9872       }
9873     return request
9874
9875   def _AddRelocateInstance(self):
9876     """Add relocate instance data to allocator structure.
9877
9878     This in combination with _IAllocatorGetClusterData will create the
9879     correct structure needed as input for the allocator.
9880
9881     The checks for the completeness of the opcode must have already been
9882     done.
9883
9884     """
9885     instance = self.cfg.GetInstanceInfo(self.name)
9886     if instance is None:
9887       raise errors.ProgrammerError("Unknown instance '%s' passed to"
9888                                    " IAllocator" % self.name)
9889
9890     if instance.disk_template not in constants.DTS_NET_MIRROR:
9891       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
9892                                  errors.ECODE_INVAL)
9893
9894     if len(instance.secondary_nodes) != 1:
9895       raise errors.OpPrereqError("Instance has not exactly one secondary node",
9896                                  errors.ECODE_STATE)
9897
9898     self.required_nodes = 1
9899     disk_sizes = [{'size': disk.size} for disk in instance.disks]
9900     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
9901
9902     request = {
9903       "name": self.name,
9904       "disk_space_total": disk_space,
9905       "required_nodes": self.required_nodes,
9906       "relocate_from": self.relocate_from,
9907       }
9908     return request
9909
9910   def _AddEvacuateNodes(self):
9911     """Add evacuate nodes data to allocator structure.
9912
9913     """
9914     request = {
9915       "evac_nodes": self.evac_nodes
9916       }
9917     return request
9918
9919   def _BuildInputData(self, fn):
9920     """Build input data structures.
9921
9922     """
9923     self._ComputeClusterData()
9924
9925     request = fn()
9926     request["type"] = self.mode
9927     self.in_data["request"] = request
9928
9929     self.in_text = serializer.Dump(self.in_data)
9930
9931   def Run(self, name, validate=True, call_fn=None):
9932     """Run an instance allocator and return the results.
9933
9934     """
9935     if call_fn is None:
9936       call_fn = self.rpc.call_iallocator_runner
9937
9938     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
9939     result.Raise("Failure while running the iallocator script")
9940
9941     self.out_text = result.payload
9942     if validate:
9943       self._ValidateResult()
9944
9945   def _ValidateResult(self):
9946     """Process the allocator results.
9947
9948     This will process and if successful save the result in
9949     self.out_data and the other parameters.
9950
9951     """
9952     try:
9953       rdict = serializer.Load(self.out_text)
9954     except Exception, err:
9955       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
9956
9957     if not isinstance(rdict, dict):
9958       raise errors.OpExecError("Can't parse iallocator results: not a dict")
9959
9960     # TODO: remove backwards compatiblity in later versions
9961     if "nodes" in rdict and "result" not in rdict:
9962       rdict["result"] = rdict["nodes"]
9963       del rdict["nodes"]
9964
9965     for key in "success", "info", "result":
9966       if key not in rdict:
9967         raise errors.OpExecError("Can't parse iallocator results:"
9968                                  " missing key '%s'" % key)
9969       setattr(self, key, rdict[key])
9970
9971     if not isinstance(rdict["result"], list):
9972       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
9973                                " is not a list")
9974     self.out_data = rdict
9975
9976
9977 class LUTestAllocator(NoHooksLU):
9978   """Run allocator tests.
9979
9980   This LU runs the allocator tests
9981
9982   """
9983   _OP_REQP = ["direction", "mode", "name"]
9984
9985   def CheckPrereq(self):
9986     """Check prerequisites.
9987
9988     This checks the opcode parameters depending on the director and mode test.
9989
9990     """
9991     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
9992       for attr in ["name", "mem_size", "disks", "disk_template",
9993                    "os", "tags", "nics", "vcpus"]:
9994         if not hasattr(self.op, attr):
9995           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
9996                                      attr, errors.ECODE_INVAL)
9997       iname = self.cfg.ExpandInstanceName(self.op.name)
9998       if iname is not None:
9999         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
10000                                    iname, errors.ECODE_EXISTS)
10001       if not isinstance(self.op.nics, list):
10002         raise errors.OpPrereqError("Invalid parameter 'nics'",
10003                                    errors.ECODE_INVAL)
10004       for row in self.op.nics:
10005         if (not isinstance(row, dict) or
10006             "mac" not in row or
10007             "ip" not in row or
10008             "bridge" not in row):
10009           raise errors.OpPrereqError("Invalid contents of the 'nics'"
10010                                      " parameter", errors.ECODE_INVAL)
10011       if not isinstance(self.op.disks, list):
10012         raise errors.OpPrereqError("Invalid parameter 'disks'",
10013                                    errors.ECODE_INVAL)
10014       for row in self.op.disks:
10015         if (not isinstance(row, dict) or
10016             "size" not in row or
10017             not isinstance(row["size"], int) or
10018             "mode" not in row or
10019             row["mode"] not in ['r', 'w']):
10020           raise errors.OpPrereqError("Invalid contents of the 'disks'"
10021                                      " parameter", errors.ECODE_INVAL)
10022       if not hasattr(self.op, "hypervisor") or self.op.hypervisor is None:
10023         self.op.hypervisor = self.cfg.GetHypervisorType()
10024     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10025       if not hasattr(self.op, "name"):
10026         raise errors.OpPrereqError("Missing attribute 'name' on opcode input",
10027                                    errors.ECODE_INVAL)
10028       fname = _ExpandInstanceName(self.cfg, self.op.name)
10029       self.op.name = fname
10030       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
10031     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10032       if not hasattr(self.op, "evac_nodes"):
10033         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
10034                                    " opcode input", errors.ECODE_INVAL)
10035     else:
10036       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
10037                                  self.op.mode, errors.ECODE_INVAL)
10038
10039     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
10040       if not hasattr(self.op, "allocator") or self.op.allocator is None:
10041         raise errors.OpPrereqError("Missing allocator name",
10042                                    errors.ECODE_INVAL)
10043     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
10044       raise errors.OpPrereqError("Wrong allocator test '%s'" %
10045                                  self.op.direction, errors.ECODE_INVAL)
10046
10047   def Exec(self, feedback_fn):
10048     """Run the allocator test.
10049
10050     """
10051     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
10052       ial = IAllocator(self.cfg, self.rpc,
10053                        mode=self.op.mode,
10054                        name=self.op.name,
10055                        mem_size=self.op.mem_size,
10056                        disks=self.op.disks,
10057                        disk_template=self.op.disk_template,
10058                        os=self.op.os,
10059                        tags=self.op.tags,
10060                        nics=self.op.nics,
10061                        vcpus=self.op.vcpus,
10062                        hypervisor=self.op.hypervisor,
10063                        )
10064     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
10065       ial = IAllocator(self.cfg, self.rpc,
10066                        mode=self.op.mode,
10067                        name=self.op.name,
10068                        relocate_from=list(self.relocate_from),
10069                        )
10070     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
10071       ial = IAllocator(self.cfg, self.rpc,
10072                        mode=self.op.mode,
10073                        evac_nodes=self.op.evac_nodes)
10074     else:
10075       raise errors.ProgrammerError("Uncatched mode %s in"
10076                                    " LUTestAllocator.Exec", self.op.mode)
10077
10078     if self.op.direction == constants.IALLOCATOR_DIR_IN:
10079       result = ial.in_text
10080     else:
10081       ial.Run(self.op.allocator, validate=False)
10082       result = ial.out_text
10083     return result