code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 class ResultWithJobs:
  78   """Data container for LU results with jobs.
  79
  80   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  81   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  82   contained in the C{jobs} attribute and include the job IDs in the opcode
  83   result.
  84
  85   """
  86   def __init__(self, jobs, **kwargs):
  87     """Initializes this class.
  88
  89     Additional return values can be specified as keyword arguments.
  90
  91     @type jobs: list of lists of L{opcode.OpCode}
  92     @param jobs: A list of lists of opcode objects
  93
  94     """
  95     self.jobs = jobs
  96     self.other = kwargs
  97
  98
  99 class LogicalUnit(object):
 100   """Logical Unit base class.
 101
 102   Subclasses must follow these rules:
 103     - implement ExpandNames
 104     - implement CheckPrereq (except when tasklets are used)
 105     - implement Exec (except when tasklets are used)
 106     - implement BuildHooksEnv
 107     - implement BuildHooksNodes
 108     - redefine HPATH and HTYPE
 109     - optionally redefine their run requirements:
 110         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 111
 112   Note that all commands require root permissions.
 113
 114   @ivar dry_run_result: the value (if any) that will be returned to the caller
 115       in dry-run mode (signalled by opcode dry_run parameter)
 116
 117   """
 118   HPATH = None
 119   HTYPE = None
 120   REQ_BGL = True
 121
 122   def __init__(self, processor, op, context, rpc):
 123     """Constructor for LogicalUnit.
 124
 125     This needs to be overridden in derived classes in order to check op
 126     validity.
 127
 128     """
 129     self.proc = processor
 130     self.op = op
 131     self.cfg = context.cfg
 132     self.glm = context.glm
 133     self.context = context
 134     self.rpc = rpc
 135     # Dicts used to declare locking needs to mcpu
 136     self.needed_locks = None
 137     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 138     self.add_locks = {}
 139     self.remove_locks = {}
 140     # Used to force good behavior when calling helper functions
 141     self.recalculate_locks = {}
 142     # logging
 143     self.Log = processor.Log # pylint: disable-msg=C0103
 144     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 145     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 146     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 147     # support for dry-run
 148     self.dry_run_result = None
 149     # support for generic debug attribute
 150     if (not hasattr(self.op, "debug_level") or
 151         not isinstance(self.op.debug_level, int)):
 152       self.op.debug_level = 0
 153
 154     # Tasklets
 155     self.tasklets = None
 156
 157     # Validate opcode parameters and set defaults
 158     self.op.Validate(True)
 159
 160     self.CheckArguments()
 161
 162   def CheckArguments(self):
 163     """Check syntactic validity for the opcode arguments.
 164
 165     This method is for doing a simple syntactic check and ensure
 166     validity of opcode parameters, without any cluster-related
 167     checks. While the same can be accomplished in ExpandNames and/or
 168     CheckPrereq, doing these separate is better because:
 169
 170       - ExpandNames is left as as purely a lock-related function
 171       - CheckPrereq is run after we have acquired locks (and possible
 172         waited for them)
 173
 174     The function is allowed to change the self.op attribute so that
 175     later methods can no longer worry about missing parameters.
 176
 177     """
 178     pass
 179
 180   def ExpandNames(self):
 181     """Expand names for this LU.
 182
 183     This method is called before starting to execute the opcode, and it should
 184     update all the parameters of the opcode to their canonical form (e.g. a
 185     short node name must be fully expanded after this method has successfully
 186     completed). This way locking, hooks, logging, etc. can work correctly.
 187
 188     LUs which implement this method must also populate the self.needed_locks
 189     member, as a dict with lock levels as keys, and a list of needed lock names
 190     as values. Rules:
 191
 192       - use an empty dict if you don't need any lock
 193       - if you don't need any lock at a particular level omit that level
 194       - don't put anything for the BGL level
 195       - if you want all locks at a level use locking.ALL_SET as a value
 196
 197     If you need to share locks (rather than acquire them exclusively) at one
 198     level you can modify self.share_locks, setting a true value (usually 1) for
 199     that level. By default locks are not shared.
 200
 201     This function can also define a list of tasklets, which then will be
 202     executed in order instead of the usual LU-level CheckPrereq and Exec
 203     functions, if those are not defined by the LU.
 204
 205     Examples::
 206
 207       # Acquire all nodes and one instance
 208       self.needed_locks = {
 209         locking.LEVEL_NODE: locking.ALL_SET,
 210         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 211       }
 212       # Acquire just two nodes
 213       self.needed_locks = {
 214         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 215       }
 216       # Acquire no locks
 217       self.needed_locks = {} # No, you can't leave it to the default value None
 218
 219     """
 220     # The implementation of this method is mandatory only if the new LU is
 221     # concurrent, so that old LUs don't need to be changed all at the same
 222     # time.
 223     if self.REQ_BGL:
 224       self.needed_locks = {} # Exclusive LUs don't need locks.
 225     else:
 226       raise NotImplementedError
 227
 228   def DeclareLocks(self, level):
 229     """Declare LU locking needs for a level
 230
 231     While most LUs can just declare their locking needs at ExpandNames time,
 232     sometimes there's the need to calculate some locks after having acquired
 233     the ones before. This function is called just before acquiring locks at a
 234     particular level, but after acquiring the ones at lower levels, and permits
 235     such calculations. It can be used to modify self.needed_locks, and by
 236     default it does nothing.
 237
 238     This function is only called if you have something already set in
 239     self.needed_locks for the level.
 240
 241     @param level: Locking level which is going to be locked
 242     @type level: member of ganeti.locking.LEVELS
 243
 244     """
 245
 246   def CheckPrereq(self):
 247     """Check prerequisites for this LU.
 248
 249     This method should check that the prerequisites for the execution
 250     of this LU are fulfilled. It can do internode communication, but
 251     it should be idempotent - no cluster or system changes are
 252     allowed.
 253
 254     The method should raise errors.OpPrereqError in case something is
 255     not fulfilled. Its return value is ignored.
 256
 257     This method should also update all the parameters of the opcode to
 258     their canonical form if it hasn't been done by ExpandNames before.
 259
 260     """
 261     if self.tasklets is not None:
 262       for (idx, tl) in enumerate(self.tasklets):
 263         logging.debug("Checking prerequisites for tasklet %s/%s",
 264                       idx + 1, len(self.tasklets))
 265         tl.CheckPrereq()
 266     else:
 267       pass
 268
 269   def Exec(self, feedback_fn):
 270     """Execute the LU.
 271
 272     This method should implement the actual work. It should raise
 273     errors.OpExecError for failures that are somewhat dealt with in
 274     code, or expected.
 275
 276     """
 277     if self.tasklets is not None:
 278       for (idx, tl) in enumerate(self.tasklets):
 279         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 280         tl.Exec(feedback_fn)
 281     else:
 282       raise NotImplementedError
 283
 284   def BuildHooksEnv(self):
 285     """Build hooks environment for this LU.
 286
 287     @rtype: dict
 288     @return: Dictionary containing the environment that will be used for
 289       running the hooks for this LU. The keys of the dict must not be prefixed
 290       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 291       will extend the environment with additional variables. If no environment
 292       should be defined, an empty dictionary should be returned (not C{None}).
 293     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 294       will not be called.
 295
 296     """
 297     raise NotImplementedError
 298
 299   def BuildHooksNodes(self):
 300     """Build list of nodes to run LU's hooks.
 301
 302     @rtype: tuple; (list, list)
 303     @return: Tuple containing a list of node names on which the hook
 304       should run before the execution and a list of node names on which the
 305       hook should run after the execution. No nodes should be returned as an
 306       empty list (and not None).
 307     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 308       will not be called.
 309
 310     """
 311     raise NotImplementedError
 312
 313   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 314     """Notify the LU about the results of its hooks.
 315
 316     This method is called every time a hooks phase is executed, and notifies
 317     the Logical Unit about the hooks' result. The LU can then use it to alter
 318     its result based on the hooks.  By default the method does nothing and the
 319     previous result is passed back unchanged but any LU can define it if it
 320     wants to use the local cluster hook-scripts somehow.
 321
 322     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 323         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 324     @param hook_results: the results of the multi-node hooks rpc call
 325     @param feedback_fn: function used send feedback back to the caller
 326     @param lu_result: the previous Exec result this LU had, or None
 327         in the PRE phase
 328     @return: the new Exec result, based on the previous result
 329         and hook results
 330
 331     """
 332     # API must be kept, thus we ignore the unused argument and could
 333     # be a function warnings
 334     # pylint: disable-msg=W0613,R0201
 335     return lu_result
 336
 337   def _ExpandAndLockInstance(self):
 338     """Helper function to expand and lock an instance.
 339
 340     Many LUs that work on an instance take its name in self.op.instance_name
 341     and need to expand it and then declare the expanded name for locking. This
 342     function does it, and then updates self.op.instance_name to the expanded
 343     name. It also initializes needed_locks as a dict, if this hasn't been done
 344     before.
 345
 346     """
 347     if self.needed_locks is None:
 348       self.needed_locks = {}
 349     else:
 350       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 351         "_ExpandAndLockInstance called with instance-level locks set"
 352     self.op.instance_name = _ExpandInstanceName(self.cfg,
 353                                                 self.op.instance_name)
 354     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 355
 356   def _LockInstancesNodes(self, primary_only=False):
 357     """Helper function to declare instances' nodes for locking.
 358
 359     This function should be called after locking one or more instances to lock
 360     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 361     with all primary or secondary nodes for instances already locked and
 362     present in self.needed_locks[locking.LEVEL_INSTANCE].
 363
 364     It should be called from DeclareLocks, and for safety only works if
 365     self.recalculate_locks[locking.LEVEL_NODE] is set.
 366
 367     In the future it may grow parameters to just lock some instance's nodes, or
 368     to just lock primaries or secondary nodes, if needed.
 369
 370     If should be called in DeclareLocks in a way similar to::
 371
 372       if level == locking.LEVEL_NODE:
 373         self._LockInstancesNodes()
 374
 375     @type primary_only: boolean
 376     @param primary_only: only lock primary nodes of locked instances
 377
 378     """
 379     assert locking.LEVEL_NODE in self.recalculate_locks, \
 380       "_LockInstancesNodes helper function called with no nodes to recalculate"
 381
 382     # TODO: check if we're really been called with the instance locks held
 383
 384     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 385     # future we might want to have different behaviors depending on the value
 386     # of self.recalculate_locks[locking.LEVEL_NODE]
 387     wanted_nodes = []
 388     for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
 389       instance = self.context.cfg.GetInstanceInfo(instance_name)
 390       wanted_nodes.append(instance.primary_node)
 391       if not primary_only:
 392         wanted_nodes.extend(instance.secondary_nodes)
 393
 394     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 395       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 396     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 397       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 398
 399     del self.recalculate_locks[locking.LEVEL_NODE]
 400
 401
 402 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 403   """Simple LU which runs no hooks.
 404
 405   This LU is intended as a parent for other LogicalUnits which will
 406   run no hooks, in order to reduce duplicate code.
 407
 408   """
 409   HPATH = None
 410   HTYPE = None
 411
 412   def BuildHooksEnv(self):
 413     """Empty BuildHooksEnv for NoHooksLu.
 414
 415     This just raises an error.
 416
 417     """
 418     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 419
 420   def BuildHooksNodes(self):
 421     """Empty BuildHooksNodes for NoHooksLU.
 422
 423     """
 424     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 425
 426
 427 class Tasklet:
 428   """Tasklet base class.
 429
 430   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 431   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 432   tasklets know nothing about locks.
 433
 434   Subclasses must follow these rules:
 435     - Implement CheckPrereq
 436     - Implement Exec
 437
 438   """
 439   def __init__(self, lu):
 440     self.lu = lu
 441
 442     # Shortcuts
 443     self.cfg = lu.cfg
 444     self.rpc = lu.rpc
 445
 446   def CheckPrereq(self):
 447     """Check prerequisites for this tasklets.
 448
 449     This method should check whether the prerequisites for the execution of
 450     this tasklet are fulfilled. It can do internode communication, but it
 451     should be idempotent - no cluster or system changes are allowed.
 452
 453     The method should raise errors.OpPrereqError in case something is not
 454     fulfilled. Its return value is ignored.
 455
 456     This method should also update all parameters to their canonical form if it
 457     hasn't been done before.
 458
 459     """
 460     pass
 461
 462   def Exec(self, feedback_fn):
 463     """Execute the tasklet.
 464
 465     This method should implement the actual work. It should raise
 466     errors.OpExecError for failures that are somewhat dealt with in code, or
 467     expected.
 468
 469     """
 470     raise NotImplementedError
 471
 472
 473 class _QueryBase:
 474   """Base for query utility classes.
 475
 476   """
 477   #: Attribute holding field definitions
 478   FIELDS = None
 479
 480   def __init__(self, filter_, fields, use_locking):
 481     """Initializes this class.
 482
 483     """
 484     self.use_locking = use_locking
 485
 486     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 487                              namefield="name")
 488     self.requested_data = self.query.RequestedData()
 489     self.names = self.query.RequestedNames()
 490
 491     # Sort only if no names were requested
 492     self.sort_by_name = not self.names
 493
 494     self.do_locking = None
 495     self.wanted = None
 496
 497   def _GetNames(self, lu, all_names, lock_level):
 498     """Helper function to determine names asked for in the query.
 499
 500     """
 501     if self.do_locking:
 502       names = lu.glm.list_owned(lock_level)
 503     else:
 504       names = all_names
 505
 506     if self.wanted == locking.ALL_SET:
 507       assert not self.names
 508       # caller didn't specify names, so ordering is not important
 509       return utils.NiceSort(names)
 510
 511     # caller specified names and we must keep the same order
 512     assert self.names
 513     assert not self.do_locking or lu.glm.is_owned(lock_level)
 514
 515     missing = set(self.wanted).difference(names)
 516     if missing:
 517       raise errors.OpExecError("Some items were removed before retrieving"
 518                                " their data: %s" % missing)
 519
 520     # Return expanded names
 521     return self.wanted
 522
 523   def ExpandNames(self, lu):
 524     """Expand names for this query.
 525
 526     See L{LogicalUnit.ExpandNames}.
 527
 528     """
 529     raise NotImplementedError()
 530
 531   def DeclareLocks(self, lu, level):
 532     """Declare locks for this query.
 533
 534     See L{LogicalUnit.DeclareLocks}.
 535
 536     """
 537     raise NotImplementedError()
 538
 539   def _GetQueryData(self, lu):
 540     """Collects all data for this query.
 541
 542     @return: Query data object
 543
 544     """
 545     raise NotImplementedError()
 546
 547   def NewStyleQuery(self, lu):
 548     """Collect data and execute query.
 549
 550     """
 551     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 552                                   sort_by_name=self.sort_by_name)
 553
 554   def OldStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return self.query.OldStyleQuery(self._GetQueryData(lu),
 559                                     sort_by_name=self.sort_by_name)
 560
 561
 562 def _GetWantedNodes(lu, nodes):
 563   """Returns list of checked and expanded node names.
 564
 565   @type lu: L{LogicalUnit}
 566   @param lu: the logical unit on whose behalf we execute
 567   @type nodes: list
 568   @param nodes: list of node names or None for all nodes
 569   @rtype: list
 570   @return: the list of nodes, sorted
 571   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 572
 573   """
 574   if nodes:
 575     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 576
 577   return utils.NiceSort(lu.cfg.GetNodeList())
 578
 579
 580 def _GetWantedInstances(lu, instances):
 581   """Returns list of checked and expanded instance names.
 582
 583   @type lu: L{LogicalUnit}
 584   @param lu: the logical unit on whose behalf we execute
 585   @type instances: list
 586   @param instances: list of instance names or None for all instances
 587   @rtype: list
 588   @return: the list of instances, sorted
 589   @raise errors.OpPrereqError: if the instances parameter is wrong type
 590   @raise errors.OpPrereqError: if any of the passed instances is not found
 591
 592   """
 593   if instances:
 594     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 595   else:
 596     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 597   return wanted
 598
 599
 600 def _GetUpdatedParams(old_params, update_dict,
 601                       use_default=True, use_none=False):
 602   """Return the new version of a parameter dictionary.
 603
 604   @type old_params: dict
 605   @param old_params: old parameters
 606   @type update_dict: dict
 607   @param update_dict: dict containing new parameter values, or
 608       constants.VALUE_DEFAULT to reset the parameter to its default
 609       value
 610   @param use_default: boolean
 611   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 612       values as 'to be deleted' values
 613   @param use_none: boolean
 614   @type use_none: whether to recognise C{None} values as 'to be
 615       deleted' values
 616   @rtype: dict
 617   @return: the new parameter dictionary
 618
 619   """
 620   params_copy = copy.deepcopy(old_params)
 621   for key, val in update_dict.iteritems():
 622     if ((use_default and val == constants.VALUE_DEFAULT) or
 623         (use_none and val is None)):
 624       try:
 625         del params_copy[key]
 626       except KeyError:
 627         pass
 628     else:
 629       params_copy[key] = val
 630   return params_copy
 631
 632
 633 def _ReleaseLocks(lu, level, names=None, keep=None):
 634   """Releases locks owned by an LU.
 635
 636   @type lu: L{LogicalUnit}
 637   @param level: Lock level
 638   @type names: list or None
 639   @param names: Names of locks to release
 640   @type keep: list or None
 641   @param keep: Names of locks to retain
 642
 643   """
 644   assert not (keep is not None and names is not None), \
 645          "Only one of the 'names' and the 'keep' parameters can be given"
 646
 647   if names is not None:
 648     should_release = names.__contains__
 649   elif keep:
 650     should_release = lambda name: name not in keep
 651   else:
 652     should_release = None
 653
 654   if should_release:
 655     retain = []
 656     release = []
 657
 658     # Determine which locks to release
 659     for name in lu.glm.list_owned(level):
 660       if should_release(name):
 661         release.append(name)
 662       else:
 663         retain.append(name)
 664
 665     assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
 666
 667     # Release just some locks
 668     lu.glm.release(level, names=release)
 669
 670     assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
 671   else:
 672     # Release everything
 673     lu.glm.release(level)
 674
 675     assert not lu.glm.is_owned(level), "No locks should be owned"
 676
 677
 678 def _RunPostHook(lu, node_name):
 679   """Runs the post-hook for an opcode on a single node.
 680
 681   """
 682   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 683   try:
 684     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 685   except:
 686     # pylint: disable-msg=W0702
 687     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 688
 689
 690 def _CheckOutputFields(static, dynamic, selected):
 691   """Checks whether all selected fields are valid.
 692
 693   @type static: L{utils.FieldSet}
 694   @param static: static fields set
 695   @type dynamic: L{utils.FieldSet}
 696   @param dynamic: dynamic fields set
 697
 698   """
 699   f = utils.FieldSet()
 700   f.Extend(static)
 701   f.Extend(dynamic)
 702
 703   delta = f.NonMatching(selected)
 704   if delta:
 705     raise errors.OpPrereqError("Unknown output fields selected: %s"
 706                                % ",".join(delta), errors.ECODE_INVAL)
 707
 708
 709 def _CheckGlobalHvParams(params):
 710   """Validates that given hypervisor params are not global ones.
 711
 712   This will ensure that instances don't get customised versions of
 713   global params.
 714
 715   """
 716   used_globals = constants.HVC_GLOBALS.intersection(params)
 717   if used_globals:
 718     msg = ("The following hypervisor parameters are global and cannot"
 719            " be customized at instance level, please modify them at"
 720            " cluster level: %s" % utils.CommaJoin(used_globals))
 721     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 722
 723
 724 def _CheckNodeOnline(lu, node, msg=None):
 725   """Ensure that a given node is online.
 726
 727   @param lu: the LU on behalf of which we make the check
 728   @param node: the node to check
 729   @param msg: if passed, should be a message to replace the default one
 730   @raise errors.OpPrereqError: if the node is offline
 731
 732   """
 733   if msg is None:
 734     msg = "Can't use offline node"
 735   if lu.cfg.GetNodeInfo(node).offline:
 736     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 737
 738
 739 def _CheckNodeNotDrained(lu, node):
 740   """Ensure that a given node is not drained.
 741
 742   @param lu: the LU on behalf of which we make the check
 743   @param node: the node to check
 744   @raise errors.OpPrereqError: if the node is drained
 745
 746   """
 747   if lu.cfg.GetNodeInfo(node).drained:
 748     raise errors.OpPrereqError("Can't use drained node %s" % node,
 749                                errors.ECODE_STATE)
 750
 751
 752 def _CheckNodeVmCapable(lu, node):
 753   """Ensure that a given node is vm capable.
 754
 755   @param lu: the LU on behalf of which we make the check
 756   @param node: the node to check
 757   @raise errors.OpPrereqError: if the node is not vm capable
 758
 759   """
 760   if not lu.cfg.GetNodeInfo(node).vm_capable:
 761     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 762                                errors.ECODE_STATE)
 763
 764
 765 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 766   """Ensure that a node supports a given OS.
 767
 768   @param lu: the LU on behalf of which we make the check
 769   @param node: the node to check
 770   @param os_name: the OS to query about
 771   @param force_variant: whether to ignore variant errors
 772   @raise errors.OpPrereqError: if the node is not supporting the OS
 773
 774   """
 775   result = lu.rpc.call_os_get(node, os_name)
 776   result.Raise("OS '%s' not in supported OS list for node %s" %
 777                (os_name, node),
 778                prereq=True, ecode=errors.ECODE_INVAL)
 779   if not force_variant:
 780     _CheckOSVariant(result.payload, os_name)
 781
 782
 783 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 784   """Ensure that a node has the given secondary ip.
 785
 786   @type lu: L{LogicalUnit}
 787   @param lu: the LU on behalf of which we make the check
 788   @type node: string
 789   @param node: the node to check
 790   @type secondary_ip: string
 791   @param secondary_ip: the ip to check
 792   @type prereq: boolean
 793   @param prereq: whether to throw a prerequisite or an execute error
 794   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 795   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 796
 797   """
 798   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 799   result.Raise("Failure checking secondary ip on node %s" % node,
 800                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 801   if not result.payload:
 802     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 803            " please fix and re-run this command" % secondary_ip)
 804     if prereq:
 805       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 806     else:
 807       raise errors.OpExecError(msg)
 808
 809
 810 def _GetClusterDomainSecret():
 811   """Reads the cluster domain secret.
 812
 813   """
 814   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 815                                strict=True)
 816
 817
 818 def _CheckInstanceDown(lu, instance, reason):
 819   """Ensure that an instance is not running."""
 820   if instance.admin_up:
 821     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 822                                (instance.name, reason), errors.ECODE_STATE)
 823
 824   pnode = instance.primary_node
 825   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 826   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 827               prereq=True, ecode=errors.ECODE_ENVIRON)
 828
 829   if instance.name in ins_l.payload:
 830     raise errors.OpPrereqError("Instance %s is running, %s" %
 831                                (instance.name, reason), errors.ECODE_STATE)
 832
 833
 834 def _ExpandItemName(fn, name, kind):
 835   """Expand an item name.
 836
 837   @param fn: the function to use for expansion
 838   @param name: requested item name
 839   @param kind: text description ('Node' or 'Instance')
 840   @return: the resolved (full) name
 841   @raise errors.OpPrereqError: if the item is not found
 842
 843   """
 844   full_name = fn(name)
 845   if full_name is None:
 846     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 847                                errors.ECODE_NOENT)
 848   return full_name
 849
 850
 851 def _ExpandNodeName(cfg, name):
 852   """Wrapper over L{_ExpandItemName} for nodes."""
 853   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 854
 855
 856 def _ExpandInstanceName(cfg, name):
 857   """Wrapper over L{_ExpandItemName} for instance."""
 858   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 859
 860
 861 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 862                           memory, vcpus, nics, disk_template, disks,
 863                           bep, hvp, hypervisor_name):
 864   """Builds instance related env variables for hooks
 865
 866   This builds the hook environment from individual variables.
 867
 868   @type name: string
 869   @param name: the name of the instance
 870   @type primary_node: string
 871   @param primary_node: the name of the instance's primary node
 872   @type secondary_nodes: list
 873   @param secondary_nodes: list of secondary nodes as strings
 874   @type os_type: string
 875   @param os_type: the name of the instance's OS
 876   @type status: boolean
 877   @param status: the should_run status of the instance
 878   @type memory: string
 879   @param memory: the memory size of the instance
 880   @type vcpus: string
 881   @param vcpus: the count of VCPUs the instance has
 882   @type nics: list
 883   @param nics: list of tuples (ip, mac, mode, link) representing
 884       the NICs the instance has
 885   @type disk_template: string
 886   @param disk_template: the disk template of the instance
 887   @type disks: list
 888   @param disks: the list of (size, mode) pairs
 889   @type bep: dict
 890   @param bep: the backend parameters for the instance
 891   @type hvp: dict
 892   @param hvp: the hypervisor parameters for the instance
 893   @type hypervisor_name: string
 894   @param hypervisor_name: the hypervisor for the instance
 895   @rtype: dict
 896   @return: the hook environment for this instance
 897
 898   """
 899   if status:
 900     str_status = "up"
 901   else:
 902     str_status = "down"
 903   env = {
 904     "OP_TARGET": name,
 905     "INSTANCE_NAME": name,
 906     "INSTANCE_PRIMARY": primary_node,
 907     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 908     "INSTANCE_OS_TYPE": os_type,
 909     "INSTANCE_STATUS": str_status,
 910     "INSTANCE_MEMORY": memory,
 911     "INSTANCE_VCPUS": vcpus,
 912     "INSTANCE_DISK_TEMPLATE": disk_template,
 913     "INSTANCE_HYPERVISOR": hypervisor_name,
 914   }
 915
 916   if nics:
 917     nic_count = len(nics)
 918     for idx, (ip, mac, mode, link) in enumerate(nics):
 919       if ip is None:
 920         ip = ""
 921       env["INSTANCE_NIC%d_IP" % idx] = ip
 922       env["INSTANCE_NIC%d_MAC" % idx] = mac
 923       env["INSTANCE_NIC%d_MODE" % idx] = mode
 924       env["INSTANCE_NIC%d_LINK" % idx] = link
 925       if mode == constants.NIC_MODE_BRIDGED:
 926         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 927   else:
 928     nic_count = 0
 929
 930   env["INSTANCE_NIC_COUNT"] = nic_count
 931
 932   if disks:
 933     disk_count = len(disks)
 934     for idx, (size, mode) in enumerate(disks):
 935       env["INSTANCE_DISK%d_SIZE" % idx] = size
 936       env["INSTANCE_DISK%d_MODE" % idx] = mode
 937   else:
 938     disk_count = 0
 939
 940   env["INSTANCE_DISK_COUNT"] = disk_count
 941
 942   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 943     for key, value in source.items():
 944       env["INSTANCE_%s_%s" % (kind, key)] = value
 945
 946   return env
 947
 948
 949 def _NICListToTuple(lu, nics):
 950   """Build a list of nic information tuples.
 951
 952   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 953   value in LUInstanceQueryData.
 954
 955   @type lu:  L{LogicalUnit}
 956   @param lu: the logical unit on whose behalf we execute
 957   @type nics: list of L{objects.NIC}
 958   @param nics: list of nics to convert to hooks tuples
 959
 960   """
 961   hooks_nics = []
 962   cluster = lu.cfg.GetClusterInfo()
 963   for nic in nics:
 964     ip = nic.ip
 965     mac = nic.mac
 966     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 967     mode = filled_params[constants.NIC_MODE]
 968     link = filled_params[constants.NIC_LINK]
 969     hooks_nics.append((ip, mac, mode, link))
 970   return hooks_nics
 971
 972
 973 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 974   """Builds instance related env variables for hooks from an object.
 975
 976   @type lu: L{LogicalUnit}
 977   @param lu: the logical unit on whose behalf we execute
 978   @type instance: L{objects.Instance}
 979   @param instance: the instance for which we should build the
 980       environment
 981   @type override: dict
 982   @param override: dictionary with key/values that will override
 983       our values
 984   @rtype: dict
 985   @return: the hook environment dictionary
 986
 987   """
 988   cluster = lu.cfg.GetClusterInfo()
 989   bep = cluster.FillBE(instance)
 990   hvp = cluster.FillHV(instance)
 991   args = {
 992     'name': instance.name,
 993     'primary_node': instance.primary_node,
 994     'secondary_nodes': instance.secondary_nodes,
 995     'os_type': instance.os,
 996     'status': instance.admin_up,
 997     'memory': bep[constants.BE_MEMORY],
 998     'vcpus': bep[constants.BE_VCPUS],
 999     'nics': _NICListToTuple(lu, instance.nics),
1000     'disk_template': instance.disk_template,
1001     'disks': [(disk.size, disk.mode) for disk in instance.disks],
1002     'bep': bep,
1003     'hvp': hvp,
1004     'hypervisor_name': instance.hypervisor,
1005   }
1006   if override:
1007     args.update(override)
1008   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1009
1010
1011 def _AdjustCandidatePool(lu, exceptions):
1012   """Adjust the candidate pool after node operations.
1013
1014   """
1015   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1016   if mod_list:
1017     lu.LogInfo("Promoted nodes to master candidate role: %s",
1018                utils.CommaJoin(node.name for node in mod_list))
1019     for name in mod_list:
1020       lu.context.ReaddNode(name)
1021   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1022   if mc_now > mc_max:
1023     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1024                (mc_now, mc_max))
1025
1026
1027 def _DecideSelfPromotion(lu, exceptions=None):
1028   """Decide whether I should promote myself as a master candidate.
1029
1030   """
1031   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1032   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1033   # the new node will increase mc_max with one, so:
1034   mc_should = min(mc_should + 1, cp_size)
1035   return mc_now < mc_should
1036
1037
1038 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1039   """Check that the brigdes needed by a list of nics exist.
1040
1041   """
1042   cluster = lu.cfg.GetClusterInfo()
1043   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1044   brlist = [params[constants.NIC_LINK] for params in paramslist
1045             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1046   if brlist:
1047     result = lu.rpc.call_bridges_exist(target_node, brlist)
1048     result.Raise("Error checking bridges on destination node '%s'" %
1049                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1050
1051
1052 def _CheckInstanceBridgesExist(lu, instance, node=None):
1053   """Check that the brigdes needed by an instance exist.
1054
1055   """
1056   if node is None:
1057     node = instance.primary_node
1058   _CheckNicsBridgesExist(lu, instance.nics, node)
1059
1060
1061 def _CheckOSVariant(os_obj, name):
1062   """Check whether an OS name conforms to the os variants specification.
1063
1064   @type os_obj: L{objects.OS}
1065   @param os_obj: OS object to check
1066   @type name: string
1067   @param name: OS name passed by the user, to check for validity
1068
1069   """
1070   if not os_obj.supported_variants:
1071     return
1072   variant = objects.OS.GetVariant(name)
1073   if not variant:
1074     raise errors.OpPrereqError("OS name must include a variant",
1075                                errors.ECODE_INVAL)
1076
1077   if variant not in os_obj.supported_variants:
1078     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1079
1080
1081 def _GetNodeInstancesInner(cfg, fn):
1082   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1083
1084
1085 def _GetNodeInstances(cfg, node_name):
1086   """Returns a list of all primary and secondary instances on a node.
1087
1088   """
1089
1090   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1091
1092
1093 def _GetNodePrimaryInstances(cfg, node_name):
1094   """Returns primary instances on a node.
1095
1096   """
1097   return _GetNodeInstancesInner(cfg,
1098                                 lambda inst: node_name == inst.primary_node)
1099
1100
1101 def _GetNodeSecondaryInstances(cfg, node_name):
1102   """Returns secondary instances on a node.
1103
1104   """
1105   return _GetNodeInstancesInner(cfg,
1106                                 lambda inst: node_name in inst.secondary_nodes)
1107
1108
1109 def _GetStorageTypeArgs(cfg, storage_type):
1110   """Returns the arguments for a storage type.
1111
1112   """
1113   # Special case for file storage
1114   if storage_type == constants.ST_FILE:
1115     # storage.FileStorage wants a list of storage directories
1116     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1117
1118   return []
1119
1120
1121 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1122   faulty = []
1123
1124   for dev in instance.disks:
1125     cfg.SetDiskID(dev, node_name)
1126
1127   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1128   result.Raise("Failed to get disk status from node %s" % node_name,
1129                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1130
1131   for idx, bdev_status in enumerate(result.payload):
1132     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1133       faulty.append(idx)
1134
1135   return faulty
1136
1137
1138 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1139   """Check the sanity of iallocator and node arguments and use the
1140   cluster-wide iallocator if appropriate.
1141
1142   Check that at most one of (iallocator, node) is specified. If none is
1143   specified, then the LU's opcode's iallocator slot is filled with the
1144   cluster-wide default iallocator.
1145
1146   @type iallocator_slot: string
1147   @param iallocator_slot: the name of the opcode iallocator slot
1148   @type node_slot: string
1149   @param node_slot: the name of the opcode target node slot
1150
1151   """
1152   node = getattr(lu.op, node_slot, None)
1153   iallocator = getattr(lu.op, iallocator_slot, None)
1154
1155   if node is not None and iallocator is not None:
1156     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1157                                errors.ECODE_INVAL)
1158   elif node is None and iallocator is None:
1159     default_iallocator = lu.cfg.GetDefaultIAllocator()
1160     if default_iallocator:
1161       setattr(lu.op, iallocator_slot, default_iallocator)
1162     else:
1163       raise errors.OpPrereqError("No iallocator or node given and no"
1164                                  " cluster-wide default iallocator found."
1165                                  " Please specify either an iallocator or a"
1166                                  " node, or set a cluster-wide default"
1167                                  " iallocator.")
1168
1169
1170 class LUClusterPostInit(LogicalUnit):
1171   """Logical unit for running hooks after cluster initialization.
1172
1173   """
1174   HPATH = "cluster-init"
1175   HTYPE = constants.HTYPE_CLUSTER
1176
1177   def BuildHooksEnv(self):
1178     """Build hooks env.
1179
1180     """
1181     return {
1182       "OP_TARGET": self.cfg.GetClusterName(),
1183       }
1184
1185   def BuildHooksNodes(self):
1186     """Build hooks nodes.
1187
1188     """
1189     return ([], [self.cfg.GetMasterNode()])
1190
1191   def Exec(self, feedback_fn):
1192     """Nothing to do.
1193
1194     """
1195     return True
1196
1197
1198 class LUClusterDestroy(LogicalUnit):
1199   """Logical unit for destroying the cluster.
1200
1201   """
1202   HPATH = "cluster-destroy"
1203   HTYPE = constants.HTYPE_CLUSTER
1204
1205   def BuildHooksEnv(self):
1206     """Build hooks env.
1207
1208     """
1209     return {
1210       "OP_TARGET": self.cfg.GetClusterName(),
1211       }
1212
1213   def BuildHooksNodes(self):
1214     """Build hooks nodes.
1215
1216     """
1217     return ([], [])
1218
1219   def CheckPrereq(self):
1220     """Check prerequisites.
1221
1222     This checks whether the cluster is empty.
1223
1224     Any errors are signaled by raising errors.OpPrereqError.
1225
1226     """
1227     master = self.cfg.GetMasterNode()
1228
1229     nodelist = self.cfg.GetNodeList()
1230     if len(nodelist) != 1 or nodelist[0] != master:
1231       raise errors.OpPrereqError("There are still %d node(s) in"
1232                                  " this cluster." % (len(nodelist) - 1),
1233                                  errors.ECODE_INVAL)
1234     instancelist = self.cfg.GetInstanceList()
1235     if instancelist:
1236       raise errors.OpPrereqError("There are still %d instance(s) in"
1237                                  " this cluster." % len(instancelist),
1238                                  errors.ECODE_INVAL)
1239
1240   def Exec(self, feedback_fn):
1241     """Destroys the cluster.
1242
1243     """
1244     master = self.cfg.GetMasterNode()
1245
1246     # Run post hooks on master node before it's removed
1247     _RunPostHook(self, master)
1248
1249     result = self.rpc.call_node_stop_master(master, False)
1250     result.Raise("Could not disable the master role")
1251
1252     return master
1253
1254
1255 def _VerifyCertificate(filename):
1256   """Verifies a certificate for LUClusterVerify.
1257
1258   @type filename: string
1259   @param filename: Path to PEM file
1260
1261   """
1262   try:
1263     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1264                                            utils.ReadFile(filename))
1265   except Exception, err: # pylint: disable-msg=W0703
1266     return (LUClusterVerify.ETYPE_ERROR,
1267             "Failed to load X509 certificate %s: %s" % (filename, err))
1268
1269   (errcode, msg) = \
1270     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1271                                 constants.SSL_CERT_EXPIRATION_ERROR)
1272
1273   if msg:
1274     fnamemsg = "While verifying %s: %s" % (filename, msg)
1275   else:
1276     fnamemsg = None
1277
1278   if errcode is None:
1279     return (None, fnamemsg)
1280   elif errcode == utils.CERT_WARNING:
1281     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1282   elif errcode == utils.CERT_ERROR:
1283     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1284
1285   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1286
1287
1288 class LUClusterVerify(LogicalUnit):
1289   """Verifies the cluster status.
1290
1291   """
1292   HPATH = "cluster-verify"
1293   HTYPE = constants.HTYPE_CLUSTER
1294   REQ_BGL = False
1295
1296   TCLUSTER = "cluster"
1297   TNODE = "node"
1298   TINSTANCE = "instance"
1299
1300   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1301   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1302   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1303   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1304   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1305   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1306   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1307   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1308   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1309   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1310   ENODEDRBD = (TNODE, "ENODEDRBD")
1311   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1312   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1313   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1314   ENODEHV = (TNODE, "ENODEHV")
1315   ENODELVM = (TNODE, "ENODELVM")
1316   ENODEN1 = (TNODE, "ENODEN1")
1317   ENODENET = (TNODE, "ENODENET")
1318   ENODEOS = (TNODE, "ENODEOS")
1319   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1320   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1321   ENODERPC = (TNODE, "ENODERPC")
1322   ENODESSH = (TNODE, "ENODESSH")
1323   ENODEVERSION = (TNODE, "ENODEVERSION")
1324   ENODESETUP = (TNODE, "ENODESETUP")
1325   ENODETIME = (TNODE, "ENODETIME")
1326   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1327
1328   ETYPE_FIELD = "code"
1329   ETYPE_ERROR = "ERROR"
1330   ETYPE_WARNING = "WARNING"
1331
1332   _HOOKS_INDENT_RE = re.compile("^", re.M)
1333
1334   class NodeImage(object):
1335     """A class representing the logical and physical status of a node.
1336
1337     @type name: string
1338     @ivar name: the node name to which this object refers
1339     @ivar volumes: a structure as returned from
1340         L{ganeti.backend.GetVolumeList} (runtime)
1341     @ivar instances: a list of running instances (runtime)
1342     @ivar pinst: list of configured primary instances (config)
1343     @ivar sinst: list of configured secondary instances (config)
1344     @ivar sbp: dictionary of {primary-node: list of instances} for all
1345         instances for which this node is secondary (config)
1346     @ivar mfree: free memory, as reported by hypervisor (runtime)
1347     @ivar dfree: free disk, as reported by the node (runtime)
1348     @ivar offline: the offline status (config)
1349     @type rpc_fail: boolean
1350     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1351         not whether the individual keys were correct) (runtime)
1352     @type lvm_fail: boolean
1353     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1354     @type hyp_fail: boolean
1355     @ivar hyp_fail: whether the RPC call didn't return the instance list
1356     @type ghost: boolean
1357     @ivar ghost: whether this is a known node or not (config)
1358     @type os_fail: boolean
1359     @ivar os_fail: whether the RPC call didn't return valid OS data
1360     @type oslist: list
1361     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1362     @type vm_capable: boolean
1363     @ivar vm_capable: whether the node can host instances
1364
1365     """
1366     def __init__(self, offline=False, name=None, vm_capable=True):
1367       self.name = name
1368       self.volumes = {}
1369       self.instances = []
1370       self.pinst = []
1371       self.sinst = []
1372       self.sbp = {}
1373       self.mfree = 0
1374       self.dfree = 0
1375       self.offline = offline
1376       self.vm_capable = vm_capable
1377       self.rpc_fail = False
1378       self.lvm_fail = False
1379       self.hyp_fail = False
1380       self.ghost = False
1381       self.os_fail = False
1382       self.oslist = {}
1383
1384   def ExpandNames(self):
1385     self.needed_locks = {
1386       locking.LEVEL_NODE: locking.ALL_SET,
1387       locking.LEVEL_INSTANCE: locking.ALL_SET,
1388     }
1389     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1390
1391   def _Error(self, ecode, item, msg, *args, **kwargs):
1392     """Format an error message.
1393
1394     Based on the opcode's error_codes parameter, either format a
1395     parseable error code, or a simpler error string.
1396
1397     This must be called only from Exec and functions called from Exec.
1398
1399     """
1400     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1401     itype, etxt = ecode
1402     # first complete the msg
1403     if args:
1404       msg = msg % args
1405     # then format the whole message
1406     if self.op.error_codes:
1407       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1408     else:
1409       if item:
1410         item = " " + item
1411       else:
1412         item = ""
1413       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1414     # and finally report it via the feedback_fn
1415     self._feedback_fn("  - %s" % msg)
1416
1417   def _ErrorIf(self, cond, *args, **kwargs):
1418     """Log an error message if the passed condition is True.
1419
1420     """
1421     cond = bool(cond) or self.op.debug_simulate_errors
1422     if cond:
1423       self._Error(*args, **kwargs)
1424     # do not mark the operation as failed for WARN cases only
1425     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1426       self.bad = self.bad or cond
1427
1428   def _VerifyNode(self, ninfo, nresult):
1429     """Perform some basic validation on data returned from a node.
1430
1431       - check the result data structure is well formed and has all the
1432         mandatory fields
1433       - check ganeti version
1434
1435     @type ninfo: L{objects.Node}
1436     @param ninfo: the node to check
1437     @param nresult: the results from the node
1438     @rtype: boolean
1439     @return: whether overall this call was successful (and we can expect
1440          reasonable values in the respose)
1441
1442     """
1443     node = ninfo.name
1444     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1445
1446     # main result, nresult should be a non-empty dict
1447     test = not nresult or not isinstance(nresult, dict)
1448     _ErrorIf(test, self.ENODERPC, node,
1449                   "unable to verify node: no data returned")
1450     if test:
1451       return False
1452
1453     # compares ganeti version
1454     local_version = constants.PROTOCOL_VERSION
1455     remote_version = nresult.get("version", None)
1456     test = not (remote_version and
1457                 isinstance(remote_version, (list, tuple)) and
1458                 len(remote_version) == 2)
1459     _ErrorIf(test, self.ENODERPC, node,
1460              "connection to node returned invalid data")
1461     if test:
1462       return False
1463
1464     test = local_version != remote_version[0]
1465     _ErrorIf(test, self.ENODEVERSION, node,
1466              "incompatible protocol versions: master %s,"
1467              " node %s", local_version, remote_version[0])
1468     if test:
1469       return False
1470
1471     # node seems compatible, we can actually try to look into its results
1472
1473     # full package version
1474     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1475                   self.ENODEVERSION, node,
1476                   "software version mismatch: master %s, node %s",
1477                   constants.RELEASE_VERSION, remote_version[1],
1478                   code=self.ETYPE_WARNING)
1479
1480     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1481     if ninfo.vm_capable and isinstance(hyp_result, dict):
1482       for hv_name, hv_result in hyp_result.iteritems():
1483         test = hv_result is not None
1484         _ErrorIf(test, self.ENODEHV, node,
1485                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1486
1487     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1488     if ninfo.vm_capable and isinstance(hvp_result, list):
1489       for item, hv_name, hv_result in hvp_result:
1490         _ErrorIf(True, self.ENODEHV, node,
1491                  "hypervisor %s parameter verify failure (source %s): %s",
1492                  hv_name, item, hv_result)
1493
1494     test = nresult.get(constants.NV_NODESETUP,
1495                        ["Missing NODESETUP results"])
1496     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1497              "; ".join(test))
1498
1499     return True
1500
1501   def _VerifyNodeTime(self, ninfo, nresult,
1502                       nvinfo_starttime, nvinfo_endtime):
1503     """Check the node time.
1504
1505     @type ninfo: L{objects.Node}
1506     @param ninfo: the node to check
1507     @param nresult: the remote results for the node
1508     @param nvinfo_starttime: the start time of the RPC call
1509     @param nvinfo_endtime: the end time of the RPC call
1510
1511     """
1512     node = ninfo.name
1513     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1514
1515     ntime = nresult.get(constants.NV_TIME, None)
1516     try:
1517       ntime_merged = utils.MergeTime(ntime)
1518     except (ValueError, TypeError):
1519       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1520       return
1521
1522     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1523       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1524     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1525       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1526     else:
1527       ntime_diff = None
1528
1529     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1530              "Node time diverges by at least %s from master node time",
1531              ntime_diff)
1532
1533   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1534     """Check the node time.
1535
1536     @type ninfo: L{objects.Node}
1537     @param ninfo: the node to check
1538     @param nresult: the remote results for the node
1539     @param vg_name: the configured VG name
1540
1541     """
1542     if vg_name is None:
1543       return
1544
1545     node = ninfo.name
1546     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1547
1548     # checks vg existence and size > 20G
1549     vglist = nresult.get(constants.NV_VGLIST, None)
1550     test = not vglist
1551     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1552     if not test:
1553       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1554                                             constants.MIN_VG_SIZE)
1555       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1556
1557     # check pv names
1558     pvlist = nresult.get(constants.NV_PVLIST, None)
1559     test = pvlist is None
1560     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1561     if not test:
1562       # check that ':' is not present in PV names, since it's a
1563       # special character for lvcreate (denotes the range of PEs to
1564       # use on the PV)
1565       for _, pvname, owner_vg in pvlist:
1566         test = ":" in pvname
1567         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1568                  " '%s' of VG '%s'", pvname, owner_vg)
1569
1570   def _VerifyNodeNetwork(self, ninfo, nresult):
1571     """Check the node time.
1572
1573     @type ninfo: L{objects.Node}
1574     @param ninfo: the node to check
1575     @param nresult: the remote results for the node
1576
1577     """
1578     node = ninfo.name
1579     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1580
1581     test = constants.NV_NODELIST not in nresult
1582     _ErrorIf(test, self.ENODESSH, node,
1583              "node hasn't returned node ssh connectivity data")
1584     if not test:
1585       if nresult[constants.NV_NODELIST]:
1586         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1587           _ErrorIf(True, self.ENODESSH, node,
1588                    "ssh communication with node '%s': %s", a_node, a_msg)
1589
1590     test = constants.NV_NODENETTEST not in nresult
1591     _ErrorIf(test, self.ENODENET, node,
1592              "node hasn't returned node tcp connectivity data")
1593     if not test:
1594       if nresult[constants.NV_NODENETTEST]:
1595         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1596         for anode in nlist:
1597           _ErrorIf(True, self.ENODENET, node,
1598                    "tcp communication with node '%s': %s",
1599                    anode, nresult[constants.NV_NODENETTEST][anode])
1600
1601     test = constants.NV_MASTERIP not in nresult
1602     _ErrorIf(test, self.ENODENET, node,
1603              "node hasn't returned node master IP reachability data")
1604     if not test:
1605       if not nresult[constants.NV_MASTERIP]:
1606         if node == self.master_node:
1607           msg = "the master node cannot reach the master IP (not configured?)"
1608         else:
1609           msg = "cannot reach the master IP"
1610         _ErrorIf(True, self.ENODENET, node, msg)
1611
1612   def _VerifyInstance(self, instance, instanceconfig, node_image,
1613                       diskstatus):
1614     """Verify an instance.
1615
1616     This function checks to see if the required block devices are
1617     available on the instance's node.
1618
1619     """
1620     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1621     node_current = instanceconfig.primary_node
1622
1623     node_vol_should = {}
1624     instanceconfig.MapLVsByNode(node_vol_should)
1625
1626     for node in node_vol_should:
1627       n_img = node_image[node]
1628       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1629         # ignore missing volumes on offline or broken nodes
1630         continue
1631       for volume in node_vol_should[node]:
1632         test = volume not in n_img.volumes
1633         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1634                  "volume %s missing on node %s", volume, node)
1635
1636     if instanceconfig.admin_up:
1637       pri_img = node_image[node_current]
1638       test = instance not in pri_img.instances and not pri_img.offline
1639       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1640                "instance not running on its primary node %s",
1641                node_current)
1642
1643     for node, n_img in node_image.items():
1644       if node != node_current:
1645         test = instance in n_img.instances
1646         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1647                  "instance should not run on node %s", node)
1648
1649     diskdata = [(nname, success, status, idx)
1650                 for (nname, disks) in diskstatus.items()
1651                 for idx, (success, status) in enumerate(disks)]
1652
1653     for nname, success, bdev_status, idx in diskdata:
1654       # the 'ghost node' construction in Exec() ensures that we have a
1655       # node here
1656       snode = node_image[nname]
1657       bad_snode = snode.ghost or snode.offline
1658       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1659                self.EINSTANCEFAULTYDISK, instance,
1660                "couldn't retrieve status for disk/%s on %s: %s",
1661                idx, nname, bdev_status)
1662       _ErrorIf((instanceconfig.admin_up and success and
1663                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1664                self.EINSTANCEFAULTYDISK, instance,
1665                "disk/%s on %s is faulty", idx, nname)
1666
1667   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1668     """Verify if there are any unknown volumes in the cluster.
1669
1670     The .os, .swap and backup volumes are ignored. All other volumes are
1671     reported as unknown.
1672
1673     @type reserved: L{ganeti.utils.FieldSet}
1674     @param reserved: a FieldSet of reserved volume names
1675
1676     """
1677     for node, n_img in node_image.items():
1678       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1679         # skip non-healthy nodes
1680         continue
1681       for volume in n_img.volumes:
1682         test = ((node not in node_vol_should or
1683                 volume not in node_vol_should[node]) and
1684                 not reserved.Matches(volume))
1685         self._ErrorIf(test, self.ENODEORPHANLV, node,
1686                       "volume %s is unknown", volume)
1687
1688   def _VerifyOrphanInstances(self, instancelist, node_image):
1689     """Verify the list of running instances.
1690
1691     This checks what instances are running but unknown to the cluster.
1692
1693     """
1694     for node, n_img in node_image.items():
1695       for o_inst in n_img.instances:
1696         test = o_inst not in instancelist
1697         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1698                       "instance %s on node %s should not exist", o_inst, node)
1699
1700   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1701     """Verify N+1 Memory Resilience.
1702
1703     Check that if one single node dies we can still start all the
1704     instances it was primary for.
1705
1706     """
1707     cluster_info = self.cfg.GetClusterInfo()
1708     for node, n_img in node_image.items():
1709       # This code checks that every node which is now listed as
1710       # secondary has enough memory to host all instances it is
1711       # supposed to should a single other node in the cluster fail.
1712       # FIXME: not ready for failover to an arbitrary node
1713       # FIXME: does not support file-backed instances
1714       # WARNING: we currently take into account down instances as well
1715       # as up ones, considering that even if they're down someone
1716       # might want to start them even in the event of a node failure.
1717       if n_img.offline:
1718         # we're skipping offline nodes from the N+1 warning, since
1719         # most likely we don't have good memory infromation from them;
1720         # we already list instances living on such nodes, and that's
1721         # enough warning
1722         continue
1723       for prinode, instances in n_img.sbp.items():
1724         needed_mem = 0
1725         for instance in instances:
1726           bep = cluster_info.FillBE(instance_cfg[instance])
1727           if bep[constants.BE_AUTO_BALANCE]:
1728             needed_mem += bep[constants.BE_MEMORY]
1729         test = n_img.mfree < needed_mem
1730         self._ErrorIf(test, self.ENODEN1, node,
1731                       "not enough memory to accomodate instance failovers"
1732                       " should node %s fail (%dMiB needed, %dMiB available)",
1733                       prinode, needed_mem, n_img.mfree)
1734
1735   @classmethod
1736   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1737                    (files_all, files_all_opt, files_mc, files_vm)):
1738     """Verifies file checksums collected from all nodes.
1739
1740     @param errorif: Callback for reporting errors
1741     @param nodeinfo: List of L{objects.Node} objects
1742     @param master_node: Name of master node
1743     @param all_nvinfo: RPC results
1744
1745     """
1746     node_names = frozenset(node.name for node in nodeinfo)
1747
1748     assert master_node in node_names
1749     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1750             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1751            "Found file listed in more than one file list"
1752
1753     # Define functions determining which nodes to consider for a file
1754     file2nodefn = dict([(filename, fn)
1755       for (files, fn) in [(files_all, None),
1756                           (files_all_opt, None),
1757                           (files_mc, lambda node: (node.master_candidate or
1758                                                    node.name == master_node)),
1759                           (files_vm, lambda node: node.vm_capable)]
1760       for filename in files])
1761
1762     fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1763
1764     for node in nodeinfo:
1765       nresult = all_nvinfo[node.name]
1766
1767       if nresult.fail_msg or not nresult.payload:
1768         node_files = None
1769       else:
1770         node_files = nresult.payload.get(constants.NV_FILELIST, None)
1771
1772       test = not (node_files and isinstance(node_files, dict))
1773       errorif(test, cls.ENODEFILECHECK, node.name,
1774               "Node did not return file checksum data")
1775       if test:
1776         continue
1777
1778       for (filename, checksum) in node_files.items():
1779         # Check if the file should be considered for a node
1780         fn = file2nodefn[filename]
1781         if fn is None or fn(node):
1782           fileinfo[filename].setdefault(checksum, set()).add(node.name)
1783
1784     for (filename, checksums) in fileinfo.items():
1785       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1786
1787       # Nodes having the file
1788       with_file = frozenset(node_name
1789                             for nodes in fileinfo[filename].values()
1790                             for node_name in nodes)
1791
1792       # Nodes missing file
1793       missing_file = node_names - with_file
1794
1795       if filename in files_all_opt:
1796         # All or no nodes
1797         errorif(missing_file and missing_file != node_names,
1798                 cls.ECLUSTERFILECHECK, None,
1799                 "File %s is optional, but it must exist on all or no nodes (not"
1800                 " found on %s)",
1801                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1802       else:
1803         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1804                 "File %s is missing from node(s) %s", filename,
1805                 utils.CommaJoin(utils.NiceSort(missing_file)))
1806
1807       # See if there are multiple versions of the file
1808       test = len(checksums) > 1
1809       if test:
1810         variants = ["variant %s on %s" %
1811                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1812                     for (idx, (checksum, nodes)) in
1813                       enumerate(sorted(checksums.items()))]
1814       else:
1815         variants = []
1816
1817       errorif(test, cls.ECLUSTERFILECHECK, None,
1818               "File %s found with %s different checksums (%s)",
1819               filename, len(checksums), "; ".join(variants))
1820
1821   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1822                       drbd_map):
1823     """Verifies and the node DRBD status.
1824
1825     @type ninfo: L{objects.Node}
1826     @param ninfo: the node to check
1827     @param nresult: the remote results for the node
1828     @param instanceinfo: the dict of instances
1829     @param drbd_helper: the configured DRBD usermode helper
1830     @param drbd_map: the DRBD map as returned by
1831         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1832
1833     """
1834     node = ninfo.name
1835     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1836
1837     if drbd_helper:
1838       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1839       test = (helper_result == None)
1840       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1841                "no drbd usermode helper returned")
1842       if helper_result:
1843         status, payload = helper_result
1844         test = not status
1845         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1846                  "drbd usermode helper check unsuccessful: %s", payload)
1847         test = status and (payload != drbd_helper)
1848         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1849                  "wrong drbd usermode helper: %s", payload)
1850
1851     # compute the DRBD minors
1852     node_drbd = {}
1853     for minor, instance in drbd_map[node].items():
1854       test = instance not in instanceinfo
1855       _ErrorIf(test, self.ECLUSTERCFG, None,
1856                "ghost instance '%s' in temporary DRBD map", instance)
1857         # ghost instance should not be running, but otherwise we
1858         # don't give double warnings (both ghost instance and
1859         # unallocated minor in use)
1860       if test:
1861         node_drbd[minor] = (instance, False)
1862       else:
1863         instance = instanceinfo[instance]
1864         node_drbd[minor] = (instance.name, instance.admin_up)
1865
1866     # and now check them
1867     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1868     test = not isinstance(used_minors, (tuple, list))
1869     _ErrorIf(test, self.ENODEDRBD, node,
1870              "cannot parse drbd status file: %s", str(used_minors))
1871     if test:
1872       # we cannot check drbd status
1873       return
1874
1875     for minor, (iname, must_exist) in node_drbd.items():
1876       test = minor not in used_minors and must_exist
1877       _ErrorIf(test, self.ENODEDRBD, node,
1878                "drbd minor %d of instance %s is not active", minor, iname)
1879     for minor in used_minors:
1880       test = minor not in node_drbd
1881       _ErrorIf(test, self.ENODEDRBD, node,
1882                "unallocated drbd minor %d is in use", minor)
1883
1884   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1885     """Builds the node OS structures.
1886
1887     @type ninfo: L{objects.Node}
1888     @param ninfo: the node to check
1889     @param nresult: the remote results for the node
1890     @param nimg: the node image object
1891
1892     """
1893     node = ninfo.name
1894     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1895
1896     remote_os = nresult.get(constants.NV_OSLIST, None)
1897     test = (not isinstance(remote_os, list) or
1898             not compat.all(isinstance(v, list) and len(v) == 7
1899                            for v in remote_os))
1900
1901     _ErrorIf(test, self.ENODEOS, node,
1902              "node hasn't returned valid OS data")
1903
1904     nimg.os_fail = test
1905
1906     if test:
1907       return
1908
1909     os_dict = {}
1910
1911     for (name, os_path, status, diagnose,
1912          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1913
1914       if name not in os_dict:
1915         os_dict[name] = []
1916
1917       # parameters is a list of lists instead of list of tuples due to
1918       # JSON lacking a real tuple type, fix it:
1919       parameters = [tuple(v) for v in parameters]
1920       os_dict[name].append((os_path, status, diagnose,
1921                             set(variants), set(parameters), set(api_ver)))
1922
1923     nimg.oslist = os_dict
1924
1925   def _VerifyNodeOS(self, ninfo, nimg, base):
1926     """Verifies the node OS list.
1927
1928     @type ninfo: L{objects.Node}
1929     @param ninfo: the node to check
1930     @param nimg: the node image object
1931     @param base: the 'template' node we match against (e.g. from the master)
1932
1933     """
1934     node = ninfo.name
1935     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1936
1937     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1938
1939     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1940     for os_name, os_data in nimg.oslist.items():
1941       assert os_data, "Empty OS status for OS %s?!" % os_name
1942       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1943       _ErrorIf(not f_status, self.ENODEOS, node,
1944                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1945       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1946                "OS '%s' has multiple entries (first one shadows the rest): %s",
1947                os_name, utils.CommaJoin([v[0] for v in os_data]))
1948       # this will catched in backend too
1949       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1950                and not f_var, self.ENODEOS, node,
1951                "OS %s with API at least %d does not declare any variant",
1952                os_name, constants.OS_API_V15)
1953       # comparisons with the 'base' image
1954       test = os_name not in base.oslist
1955       _ErrorIf(test, self.ENODEOS, node,
1956                "Extra OS %s not present on reference node (%s)",
1957                os_name, base.name)
1958       if test:
1959         continue
1960       assert base.oslist[os_name], "Base node has empty OS status?"
1961       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1962       if not b_status:
1963         # base OS is invalid, skipping
1964         continue
1965       for kind, a, b in [("API version", f_api, b_api),
1966                          ("variants list", f_var, b_var),
1967                          ("parameters", beautify_params(f_param),
1968                           beautify_params(b_param))]:
1969         _ErrorIf(a != b, self.ENODEOS, node,
1970                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
1971                  kind, os_name, base.name,
1972                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1973
1974     # check any missing OSes
1975     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1976     _ErrorIf(missing, self.ENODEOS, node,
1977              "OSes present on reference node %s but missing on this node: %s",
1978              base.name, utils.CommaJoin(missing))
1979
1980   def _VerifyOob(self, ninfo, nresult):
1981     """Verifies out of band functionality of a node.
1982
1983     @type ninfo: L{objects.Node}
1984     @param ninfo: the node to check
1985     @param nresult: the remote results for the node
1986
1987     """
1988     node = ninfo.name
1989     # We just have to verify the paths on master and/or master candidates
1990     # as the oob helper is invoked on the master
1991     if ((ninfo.master_candidate or ninfo.master_capable) and
1992         constants.NV_OOB_PATHS in nresult):
1993       for path_result in nresult[constants.NV_OOB_PATHS]:
1994         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1995
1996   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1997     """Verifies and updates the node volume data.
1998
1999     This function will update a L{NodeImage}'s internal structures
2000     with data from the remote call.
2001
2002     @type ninfo: L{objects.Node}
2003     @param ninfo: the node to check
2004     @param nresult: the remote results for the node
2005     @param nimg: the node image object
2006     @param vg_name: the configured VG name
2007
2008     """
2009     node = ninfo.name
2010     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2011
2012     nimg.lvm_fail = True
2013     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2014     if vg_name is None:
2015       pass
2016     elif isinstance(lvdata, basestring):
2017       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2018                utils.SafeEncode(lvdata))
2019     elif not isinstance(lvdata, dict):
2020       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2021     else:
2022       nimg.volumes = lvdata
2023       nimg.lvm_fail = False
2024
2025   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2026     """Verifies and updates the node instance list.
2027
2028     If the listing was successful, then updates this node's instance
2029     list. Otherwise, it marks the RPC call as failed for the instance
2030     list key.
2031
2032     @type ninfo: L{objects.Node}
2033     @param ninfo: the node to check
2034     @param nresult: the remote results for the node
2035     @param nimg: the node image object
2036
2037     """
2038     idata = nresult.get(constants.NV_INSTANCELIST, None)
2039     test = not isinstance(idata, list)
2040     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2041                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2042     if test:
2043       nimg.hyp_fail = True
2044     else:
2045       nimg.instances = idata
2046
2047   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2048     """Verifies and computes a node information map
2049
2050     @type ninfo: L{objects.Node}
2051     @param ninfo: the node to check
2052     @param nresult: the remote results for the node
2053     @param nimg: the node image object
2054     @param vg_name: the configured VG name
2055
2056     """
2057     node = ninfo.name
2058     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2059
2060     # try to read free memory (from the hypervisor)
2061     hv_info = nresult.get(constants.NV_HVINFO, None)
2062     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2063     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2064     if not test:
2065       try:
2066         nimg.mfree = int(hv_info["memory_free"])
2067       except (ValueError, TypeError):
2068         _ErrorIf(True, self.ENODERPC, node,
2069                  "node returned invalid nodeinfo, check hypervisor")
2070
2071     # FIXME: devise a free space model for file based instances as well
2072     if vg_name is not None:
2073       test = (constants.NV_VGLIST not in nresult or
2074               vg_name not in nresult[constants.NV_VGLIST])
2075       _ErrorIf(test, self.ENODELVM, node,
2076                "node didn't return data for the volume group '%s'"
2077                " - it is either missing or broken", vg_name)
2078       if not test:
2079         try:
2080           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2081         except (ValueError, TypeError):
2082           _ErrorIf(True, self.ENODERPC, node,
2083                    "node returned invalid LVM info, check LVM status")
2084
2085   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2086     """Gets per-disk status information for all instances.
2087
2088     @type nodelist: list of strings
2089     @param nodelist: Node names
2090     @type node_image: dict of (name, L{objects.Node})
2091     @param node_image: Node objects
2092     @type instanceinfo: dict of (name, L{objects.Instance})
2093     @param instanceinfo: Instance objects
2094     @rtype: {instance: {node: [(succes, payload)]}}
2095     @return: a dictionary of per-instance dictionaries with nodes as
2096         keys and disk information as values; the disk information is a
2097         list of tuples (success, payload)
2098
2099     """
2100     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2101
2102     node_disks = {}
2103     node_disks_devonly = {}
2104     diskless_instances = set()
2105     diskless = constants.DT_DISKLESS
2106
2107     for nname in nodelist:
2108       node_instances = list(itertools.chain(node_image[nname].pinst,
2109                                             node_image[nname].sinst))
2110       diskless_instances.update(inst for inst in node_instances
2111                                 if instanceinfo[inst].disk_template == diskless)
2112       disks = [(inst, disk)
2113                for inst in node_instances
2114                for disk in instanceinfo[inst].disks]
2115
2116       if not disks:
2117         # No need to collect data
2118         continue
2119
2120       node_disks[nname] = disks
2121
2122       # Creating copies as SetDiskID below will modify the objects and that can
2123       # lead to incorrect data returned from nodes
2124       devonly = [dev.Copy() for (_, dev) in disks]
2125
2126       for dev in devonly:
2127         self.cfg.SetDiskID(dev, nname)
2128
2129       node_disks_devonly[nname] = devonly
2130
2131     assert len(node_disks) == len(node_disks_devonly)
2132
2133     # Collect data from all nodes with disks
2134     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2135                                                           node_disks_devonly)
2136
2137     assert len(result) == len(node_disks)
2138
2139     instdisk = {}
2140
2141     for (nname, nres) in result.items():
2142       disks = node_disks[nname]
2143
2144       if nres.offline:
2145         # No data from this node
2146         data = len(disks) * [(False, "node offline")]
2147       else:
2148         msg = nres.fail_msg
2149         _ErrorIf(msg, self.ENODERPC, nname,
2150                  "while getting disk information: %s", msg)
2151         if msg:
2152           # No data from this node
2153           data = len(disks) * [(False, msg)]
2154         else:
2155           data = []
2156           for idx, i in enumerate(nres.payload):
2157             if isinstance(i, (tuple, list)) and len(i) == 2:
2158               data.append(i)
2159             else:
2160               logging.warning("Invalid result from node %s, entry %d: %s",
2161                               nname, idx, i)
2162               data.append((False, "Invalid result from the remote node"))
2163
2164       for ((inst, _), status) in zip(disks, data):
2165         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2166
2167     # Add empty entries for diskless instances.
2168     for inst in diskless_instances:
2169       assert inst not in instdisk
2170       instdisk[inst] = {}
2171
2172     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2173                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2174                       compat.all(isinstance(s, (tuple, list)) and
2175                                  len(s) == 2 for s in statuses)
2176                       for inst, nnames in instdisk.items()
2177                       for nname, statuses in nnames.items())
2178     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2179
2180     return instdisk
2181
2182   def _VerifyHVP(self, hvp_data):
2183     """Verifies locally the syntax of the hypervisor parameters.
2184
2185     """
2186     for item, hv_name, hv_params in hvp_data:
2187       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2188              (item, hv_name))
2189       try:
2190         hv_class = hypervisor.GetHypervisor(hv_name)
2191         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2192         hv_class.CheckParameterSyntax(hv_params)
2193       except errors.GenericError, err:
2194         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2195
2196   def BuildHooksEnv(self):
2197     """Build hooks env.
2198
2199     Cluster-Verify hooks just ran in the post phase and their failure makes
2200     the output be logged in the verify output and the verification to fail.
2201
2202     """
2203     cfg = self.cfg
2204
2205     env = {
2206       "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2207       }
2208
2209     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2210                for node in cfg.GetAllNodesInfo().values())
2211
2212     return env
2213
2214   def BuildHooksNodes(self):
2215     """Build hooks nodes.
2216
2217     """
2218     return ([], self.cfg.GetNodeList())
2219
2220   def Exec(self, feedback_fn):
2221     """Verify integrity of cluster, performing various test on nodes.
2222
2223     """
2224     # This method has too many local variables. pylint: disable-msg=R0914
2225     self.bad = False
2226     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2227     verbose = self.op.verbose
2228     self._feedback_fn = feedback_fn
2229     feedback_fn("* Verifying global settings")
2230     for msg in self.cfg.VerifyConfig():
2231       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2232
2233     # Check the cluster certificates
2234     for cert_filename in constants.ALL_CERT_FILES:
2235       (errcode, msg) = _VerifyCertificate(cert_filename)
2236       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2237
2238     vg_name = self.cfg.GetVGName()
2239     drbd_helper = self.cfg.GetDRBDHelper()
2240     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2241     cluster = self.cfg.GetClusterInfo()
2242     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2243     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2244     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2245     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2246     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2247                         for iname in instancelist)
2248     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2249     i_non_redundant = [] # Non redundant instances
2250     i_non_a_balanced = [] # Non auto-balanced instances
2251     n_offline = 0 # Count of offline nodes
2252     n_drained = 0 # Count of nodes being drained
2253     node_vol_should = {}
2254
2255     # FIXME: verify OS list
2256
2257     # File verification
2258     filemap = _ComputeAncillaryFiles(cluster, False)
2259
2260     # do local checksums
2261     master_node = self.master_node = self.cfg.GetMasterNode()
2262     master_ip = self.cfg.GetMasterIP()
2263
2264     # Compute the set of hypervisor parameters
2265     hvp_data = []
2266     for hv_name in hypervisors:
2267       hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2268     for os_name, os_hvp in cluster.os_hvp.items():
2269       for hv_name, hv_params in os_hvp.items():
2270         if not hv_params:
2271           continue
2272         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2273         hvp_data.append(("os %s" % os_name, hv_name, full_params))
2274     # TODO: collapse identical parameter values in a single one
2275     for instance in instanceinfo.values():
2276       if not instance.hvparams:
2277         continue
2278       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2279                        cluster.FillHV(instance)))
2280     # and verify them locally
2281     self._VerifyHVP(hvp_data)
2282
2283     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2284     node_verify_param = {
2285       constants.NV_FILELIST:
2286         utils.UniqueSequence(filename
2287                              for files in filemap
2288                              for filename in files),
2289       constants.NV_NODELIST: [node.name for node in nodeinfo
2290                               if not node.offline],
2291       constants.NV_HYPERVISOR: hypervisors,
2292       constants.NV_HVPARAMS: hvp_data,
2293       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2294                                   node.secondary_ip) for node in nodeinfo
2295                                  if not node.offline],
2296       constants.NV_INSTANCELIST: hypervisors,
2297       constants.NV_VERSION: None,
2298       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2299       constants.NV_NODESETUP: None,
2300       constants.NV_TIME: None,
2301       constants.NV_MASTERIP: (master_node, master_ip),
2302       constants.NV_OSLIST: None,
2303       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2304       }
2305
2306     if vg_name is not None:
2307       node_verify_param[constants.NV_VGLIST] = None
2308       node_verify_param[constants.NV_LVLIST] = vg_name
2309       node_verify_param[constants.NV_PVLIST] = [vg_name]
2310       node_verify_param[constants.NV_DRBDLIST] = None
2311
2312     if drbd_helper:
2313       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2314
2315     # Build our expected cluster state
2316     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2317                                                  name=node.name,
2318                                                  vm_capable=node.vm_capable))
2319                       for node in nodeinfo)
2320
2321     # Gather OOB paths
2322     oob_paths = []
2323     for node in nodeinfo:
2324       path = _SupportsOob(self.cfg, node)
2325       if path and path not in oob_paths:
2326         oob_paths.append(path)
2327
2328     if oob_paths:
2329       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2330
2331     for instance in instancelist:
2332       inst_config = instanceinfo[instance]
2333
2334       for nname in inst_config.all_nodes:
2335         if nname not in node_image:
2336           # ghost node
2337           gnode = self.NodeImage(name=nname)
2338           gnode.ghost = True
2339           node_image[nname] = gnode
2340
2341       inst_config.MapLVsByNode(node_vol_should)
2342
2343       pnode = inst_config.primary_node
2344       node_image[pnode].pinst.append(instance)
2345
2346       for snode in inst_config.secondary_nodes:
2347         nimg = node_image[snode]
2348         nimg.sinst.append(instance)
2349         if pnode not in nimg.sbp:
2350           nimg.sbp[pnode] = []
2351         nimg.sbp[pnode].append(instance)
2352
2353     # At this point, we have the in-memory data structures complete,
2354     # except for the runtime information, which we'll gather next
2355
2356     # Due to the way our RPC system works, exact response times cannot be
2357     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2358     # time before and after executing the request, we can at least have a time
2359     # window.
2360     nvinfo_starttime = time.time()
2361     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2362                                            self.cfg.GetClusterName())
2363     nvinfo_endtime = time.time()
2364
2365     all_drbd_map = self.cfg.ComputeDRBDMap()
2366
2367     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2368     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2369
2370     feedback_fn("* Verifying configuration file consistency")
2371     self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
2372
2373     feedback_fn("* Verifying node status")
2374
2375     refos_img = None
2376
2377     for node_i in nodeinfo:
2378       node = node_i.name
2379       nimg = node_image[node]
2380
2381       if node_i.offline:
2382         if verbose:
2383           feedback_fn("* Skipping offline node %s" % (node,))
2384         n_offline += 1
2385         continue
2386
2387       if node == master_node:
2388         ntype = "master"
2389       elif node_i.master_candidate:
2390         ntype = "master candidate"
2391       elif node_i.drained:
2392         ntype = "drained"
2393         n_drained += 1
2394       else:
2395         ntype = "regular"
2396       if verbose:
2397         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2398
2399       msg = all_nvinfo[node].fail_msg
2400       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2401       if msg:
2402         nimg.rpc_fail = True
2403         continue
2404
2405       nresult = all_nvinfo[node].payload
2406
2407       nimg.call_ok = self._VerifyNode(node_i, nresult)
2408       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2409       self._VerifyNodeNetwork(node_i, nresult)
2410       self._VerifyOob(node_i, nresult)
2411
2412       if nimg.vm_capable:
2413         self._VerifyNodeLVM(node_i, nresult, vg_name)
2414         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2415                              all_drbd_map)
2416
2417         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2418         self._UpdateNodeInstances(node_i, nresult, nimg)
2419         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2420         self._UpdateNodeOS(node_i, nresult, nimg)
2421         if not nimg.os_fail:
2422           if refos_img is None:
2423             refos_img = nimg
2424           self._VerifyNodeOS(node_i, nimg, refos_img)
2425
2426     feedback_fn("* Verifying instance status")
2427     for instance in instancelist:
2428       if verbose:
2429         feedback_fn("* Verifying instance %s" % instance)
2430       inst_config = instanceinfo[instance]
2431       self._VerifyInstance(instance, inst_config, node_image,
2432                            instdisk[instance])
2433       inst_nodes_offline = []
2434
2435       pnode = inst_config.primary_node
2436       pnode_img = node_image[pnode]
2437       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2438                self.ENODERPC, pnode, "instance %s, connection to"
2439                " primary node failed", instance)
2440
2441       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2442                self.EINSTANCEBADNODE, instance,
2443                "instance is marked as running and lives on offline node %s",
2444                inst_config.primary_node)
2445
2446       # If the instance is non-redundant we cannot survive losing its primary
2447       # node, so we are not N+1 compliant. On the other hand we have no disk
2448       # templates with more than one secondary so that situation is not well
2449       # supported either.
2450       # FIXME: does not support file-backed instances
2451       if not inst_config.secondary_nodes:
2452         i_non_redundant.append(instance)
2453
2454       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2455                instance, "instance has multiple secondary nodes: %s",
2456                utils.CommaJoin(inst_config.secondary_nodes),
2457                code=self.ETYPE_WARNING)
2458
2459       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2460         pnode = inst_config.primary_node
2461         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2462         instance_groups = {}
2463
2464         for node in instance_nodes:
2465           instance_groups.setdefault(nodeinfo_byname[node].group,
2466                                      []).append(node)
2467
2468         pretty_list = [
2469           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2470           # Sort so that we always list the primary node first.
2471           for group, nodes in sorted(instance_groups.items(),
2472                                      key=lambda (_, nodes): pnode in nodes,
2473                                      reverse=True)]
2474
2475         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2476                       instance, "instance has primary and secondary nodes in"
2477                       " different groups: %s", utils.CommaJoin(pretty_list),
2478                       code=self.ETYPE_WARNING)
2479
2480       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2481         i_non_a_balanced.append(instance)
2482
2483       for snode in inst_config.secondary_nodes:
2484         s_img = node_image[snode]
2485         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2486                  "instance %s, connection to secondary node failed", instance)
2487
2488         if s_img.offline:
2489           inst_nodes_offline.append(snode)
2490
2491       # warn that the instance lives on offline nodes
2492       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2493                "instance has offline secondary node(s) %s",
2494                utils.CommaJoin(inst_nodes_offline))
2495       # ... or ghost/non-vm_capable nodes
2496       for node in inst_config.all_nodes:
2497         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2498                  "instance lives on ghost node %s", node)
2499         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2500                  instance, "instance lives on non-vm_capable node %s", node)
2501
2502     feedback_fn("* Verifying orphan volumes")
2503     reserved = utils.FieldSet(*cluster.reserved_lvs)
2504     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2505
2506     feedback_fn("* Verifying orphan instances")
2507     self._VerifyOrphanInstances(instancelist, node_image)
2508
2509     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2510       feedback_fn("* Verifying N+1 Memory redundancy")
2511       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2512
2513     feedback_fn("* Other Notes")
2514     if i_non_redundant:
2515       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2516                   % len(i_non_redundant))
2517
2518     if i_non_a_balanced:
2519       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2520                   % len(i_non_a_balanced))
2521
2522     if n_offline:
2523       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2524
2525     if n_drained:
2526       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2527
2528     return not self.bad
2529
2530   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2531     """Analyze the post-hooks' result
2532
2533     This method analyses the hook result, handles it, and sends some
2534     nicely-formatted feedback back to the user.
2535
2536     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2537         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2538     @param hooks_results: the results of the multi-node hooks rpc call
2539     @param feedback_fn: function used send feedback back to the caller
2540     @param lu_result: previous Exec result
2541     @return: the new Exec result, based on the previous result
2542         and hook results
2543
2544     """
2545     # We only really run POST phase hooks, and are only interested in
2546     # their results
2547     if phase == constants.HOOKS_PHASE_POST:
2548       # Used to change hooks' output to proper indentation
2549       feedback_fn("* Hooks Results")
2550       assert hooks_results, "invalid result from hooks"
2551
2552       for node_name in hooks_results:
2553         res = hooks_results[node_name]
2554         msg = res.fail_msg
2555         test = msg and not res.offline
2556         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2557                       "Communication failure in hooks execution: %s", msg)
2558         if res.offline or msg:
2559           # No need to investigate payload if node is offline or gave an error.
2560           # override manually lu_result here as _ErrorIf only
2561           # overrides self.bad
2562           lu_result = 1
2563           continue
2564         for script, hkr, output in res.payload:
2565           test = hkr == constants.HKR_FAIL
2566           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2567                         "Script %s failed, output:", script)
2568           if test:
2569             output = self._HOOKS_INDENT_RE.sub('      ', output)
2570             feedback_fn("%s" % output)
2571             lu_result = 0
2572
2573       return lu_result
2574
2575
2576 class LUClusterVerifyDisks(NoHooksLU):
2577   """Verifies the cluster disks status.
2578
2579   """
2580   REQ_BGL = False
2581
2582   def ExpandNames(self):
2583     self.needed_locks = {
2584       locking.LEVEL_NODE: locking.ALL_SET,
2585       locking.LEVEL_INSTANCE: locking.ALL_SET,
2586     }
2587     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2588
2589   def Exec(self, feedback_fn):
2590     """Verify integrity of cluster disks.
2591
2592     @rtype: tuple of three items
2593     @return: a tuple of (dict of node-to-node_error, list of instances
2594         which need activate-disks, dict of instance: (node, volume) for
2595         missing volumes
2596
2597     """
2598     result = res_nodes, res_instances, res_missing = {}, [], {}
2599
2600     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2601     instances = self.cfg.GetAllInstancesInfo().values()
2602
2603     nv_dict = {}
2604     for inst in instances:
2605       inst_lvs = {}
2606       if not inst.admin_up:
2607         continue
2608       inst.MapLVsByNode(inst_lvs)
2609       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2610       for node, vol_list in inst_lvs.iteritems():
2611         for vol in vol_list:
2612           nv_dict[(node, vol)] = inst
2613
2614     if not nv_dict:
2615       return result
2616
2617     node_lvs = self.rpc.call_lv_list(nodes, [])
2618     for node, node_res in node_lvs.items():
2619       if node_res.offline:
2620         continue
2621       msg = node_res.fail_msg
2622       if msg:
2623         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2624         res_nodes[node] = msg
2625         continue
2626
2627       lvs = node_res.payload
2628       for lv_name, (_, _, lv_online) in lvs.items():
2629         inst = nv_dict.pop((node, lv_name), None)
2630         if (not lv_online and inst is not None
2631             and inst.name not in res_instances):
2632           res_instances.append(inst.name)
2633
2634     # any leftover items in nv_dict are missing LVs, let's arrange the
2635     # data better
2636     for key, inst in nv_dict.iteritems():
2637       if inst.name not in res_missing:
2638         res_missing[inst.name] = []
2639       res_missing[inst.name].append(key)
2640
2641     return result
2642
2643
2644 class LUClusterRepairDiskSizes(NoHooksLU):
2645   """Verifies the cluster disks sizes.
2646
2647   """
2648   REQ_BGL = False
2649
2650   def ExpandNames(self):
2651     if self.op.instances:
2652       self.wanted_names = []
2653       for name in self.op.instances:
2654         full_name = _ExpandInstanceName(self.cfg, name)
2655         self.wanted_names.append(full_name)
2656       self.needed_locks = {
2657         locking.LEVEL_NODE: [],
2658         locking.LEVEL_INSTANCE: self.wanted_names,
2659         }
2660       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2661     else:
2662       self.wanted_names = None
2663       self.needed_locks = {
2664         locking.LEVEL_NODE: locking.ALL_SET,
2665         locking.LEVEL_INSTANCE: locking.ALL_SET,
2666         }
2667     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2668
2669   def DeclareLocks(self, level):
2670     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2671       self._LockInstancesNodes(primary_only=True)
2672
2673   def CheckPrereq(self):
2674     """Check prerequisites.
2675
2676     This only checks the optional instance list against the existing names.
2677
2678     """
2679     if self.wanted_names is None:
2680       self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2681
2682     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2683                              in self.wanted_names]
2684
2685   def _EnsureChildSizes(self, disk):
2686     """Ensure children of the disk have the needed disk size.
2687
2688     This is valid mainly for DRBD8 and fixes an issue where the
2689     children have smaller disk size.
2690
2691     @param disk: an L{ganeti.objects.Disk} object
2692
2693     """
2694     if disk.dev_type == constants.LD_DRBD8:
2695       assert disk.children, "Empty children for DRBD8?"
2696       fchild = disk.children[0]
2697       mismatch = fchild.size < disk.size
2698       if mismatch:
2699         self.LogInfo("Child disk has size %d, parent %d, fixing",
2700                      fchild.size, disk.size)
2701         fchild.size = disk.size
2702
2703       # and we recurse on this child only, not on the metadev
2704       return self._EnsureChildSizes(fchild) or mismatch
2705     else:
2706       return False
2707
2708   def Exec(self, feedback_fn):
2709     """Verify the size of cluster disks.
2710
2711     """
2712     # TODO: check child disks too
2713     # TODO: check differences in size between primary/secondary nodes
2714     per_node_disks = {}
2715     for instance in self.wanted_instances:
2716       pnode = instance.primary_node
2717       if pnode not in per_node_disks:
2718         per_node_disks[pnode] = []
2719       for idx, disk in enumerate(instance.disks):
2720         per_node_disks[pnode].append((instance, idx, disk))
2721
2722     changed = []
2723     for node, dskl in per_node_disks.items():
2724       newl = [v[2].Copy() for v in dskl]
2725       for dsk in newl:
2726         self.cfg.SetDiskID(dsk, node)
2727       result = self.rpc.call_blockdev_getsize(node, newl)
2728       if result.fail_msg:
2729         self.LogWarning("Failure in blockdev_getsize call to node"
2730                         " %s, ignoring", node)
2731         continue
2732       if len(result.payload) != len(dskl):
2733         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2734                         " result.payload=%s", node, len(dskl), result.payload)
2735         self.LogWarning("Invalid result from node %s, ignoring node results",
2736                         node)
2737         continue
2738       for ((instance, idx, disk), size) in zip(dskl, result.payload):
2739         if size is None:
2740           self.LogWarning("Disk %d of instance %s did not return size"
2741                           " information, ignoring", idx, instance.name)
2742           continue
2743         if not isinstance(size, (int, long)):
2744           self.LogWarning("Disk %d of instance %s did not return valid"
2745                           " size information, ignoring", idx, instance.name)
2746           continue
2747         size = size >> 20
2748         if size != disk.size:
2749           self.LogInfo("Disk %d of instance %s has mismatched size,"
2750                        " correcting: recorded %d, actual %d", idx,
2751                        instance.name, disk.size, size)
2752           disk.size = size
2753           self.cfg.Update(instance, feedback_fn)
2754           changed.append((instance.name, idx, size))
2755         if self._EnsureChildSizes(disk):
2756           self.cfg.Update(instance, feedback_fn)
2757           changed.append((instance.name, idx, disk.size))
2758     return changed
2759
2760
2761 class LUClusterRename(LogicalUnit):
2762   """Rename the cluster.
2763
2764   """
2765   HPATH = "cluster-rename"
2766   HTYPE = constants.HTYPE_CLUSTER
2767
2768   def BuildHooksEnv(self):
2769     """Build hooks env.
2770
2771     """
2772     return {
2773       "OP_TARGET": self.cfg.GetClusterName(),
2774       "NEW_NAME": self.op.name,
2775       }
2776
2777   def BuildHooksNodes(self):
2778     """Build hooks nodes.
2779
2780     """
2781     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2782
2783   def CheckPrereq(self):
2784     """Verify that the passed name is a valid one.
2785
2786     """
2787     hostname = netutils.GetHostname(name=self.op.name,
2788                                     family=self.cfg.GetPrimaryIPFamily())
2789
2790     new_name = hostname.name
2791     self.ip = new_ip = hostname.ip
2792     old_name = self.cfg.GetClusterName()
2793     old_ip = self.cfg.GetMasterIP()
2794     if new_name == old_name and new_ip == old_ip:
2795       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2796                                  " cluster has changed",
2797                                  errors.ECODE_INVAL)
2798     if new_ip != old_ip:
2799       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2800         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2801                                    " reachable on the network" %
2802                                    new_ip, errors.ECODE_NOTUNIQUE)
2803
2804     self.op.name = new_name
2805
2806   def Exec(self, feedback_fn):
2807     """Rename the cluster.
2808
2809     """
2810     clustername = self.op.name
2811     ip = self.ip
2812
2813     # shutdown the master IP
2814     master = self.cfg.GetMasterNode()
2815     result = self.rpc.call_node_stop_master(master, False)
2816     result.Raise("Could not disable the master role")
2817
2818     try:
2819       cluster = self.cfg.GetClusterInfo()
2820       cluster.cluster_name = clustername
2821       cluster.master_ip = ip
2822       self.cfg.Update(cluster, feedback_fn)
2823
2824       # update the known hosts file
2825       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2826       node_list = self.cfg.GetOnlineNodeList()
2827       try:
2828         node_list.remove(master)
2829       except ValueError:
2830         pass
2831       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2832     finally:
2833       result = self.rpc.call_node_start_master(master, False, False)
2834       msg = result.fail_msg
2835       if msg:
2836         self.LogWarning("Could not re-enable the master role on"
2837                         " the master, please restart manually: %s", msg)
2838
2839     return clustername
2840
2841
2842 class LUClusterSetParams(LogicalUnit):
2843   """Change the parameters of the cluster.
2844
2845   """
2846   HPATH = "cluster-modify"
2847   HTYPE = constants.HTYPE_CLUSTER
2848   REQ_BGL = False
2849
2850   def CheckArguments(self):
2851     """Check parameters
2852
2853     """
2854     if self.op.uid_pool:
2855       uidpool.CheckUidPool(self.op.uid_pool)
2856
2857     if self.op.add_uids:
2858       uidpool.CheckUidPool(self.op.add_uids)
2859
2860     if self.op.remove_uids:
2861       uidpool.CheckUidPool(self.op.remove_uids)
2862
2863   def ExpandNames(self):
2864     # FIXME: in the future maybe other cluster params won't require checking on
2865     # all nodes to be modified.
2866     self.needed_locks = {
2867       locking.LEVEL_NODE: locking.ALL_SET,
2868     }
2869     self.share_locks[locking.LEVEL_NODE] = 1
2870
2871   def BuildHooksEnv(self):
2872     """Build hooks env.
2873
2874     """
2875     return {
2876       "OP_TARGET": self.cfg.GetClusterName(),
2877       "NEW_VG_NAME": self.op.vg_name,
2878       }
2879
2880   def BuildHooksNodes(self):
2881     """Build hooks nodes.
2882
2883     """
2884     mn = self.cfg.GetMasterNode()
2885     return ([mn], [mn])
2886
2887   def CheckPrereq(self):
2888     """Check prerequisites.
2889
2890     This checks whether the given params don't conflict and
2891     if the given volume group is valid.
2892
2893     """
2894     if self.op.vg_name is not None and not self.op.vg_name:
2895       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2896         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2897                                    " instances exist", errors.ECODE_INVAL)
2898
2899     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2900       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2901         raise errors.OpPrereqError("Cannot disable drbd helper while"
2902                                    " drbd-based instances exist",
2903                                    errors.ECODE_INVAL)
2904
2905     node_list = self.glm.list_owned(locking.LEVEL_NODE)
2906
2907     # if vg_name not None, checks given volume group on all nodes
2908     if self.op.vg_name:
2909       vglist = self.rpc.call_vg_list(node_list)
2910       for node in node_list:
2911         msg = vglist[node].fail_msg
2912         if msg:
2913           # ignoring down node
2914           self.LogWarning("Error while gathering data on node %s"
2915                           " (ignoring node): %s", node, msg)
2916           continue
2917         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2918                                               self.op.vg_name,
2919                                               constants.MIN_VG_SIZE)
2920         if vgstatus:
2921           raise errors.OpPrereqError("Error on node '%s': %s" %
2922                                      (node, vgstatus), errors.ECODE_ENVIRON)
2923
2924     if self.op.drbd_helper:
2925       # checks given drbd helper on all nodes
2926       helpers = self.rpc.call_drbd_helper(node_list)
2927       for node in node_list:
2928         ninfo = self.cfg.GetNodeInfo(node)
2929         if ninfo.offline:
2930           self.LogInfo("Not checking drbd helper on offline node %s", node)
2931           continue
2932         msg = helpers[node].fail_msg
2933         if msg:
2934           raise errors.OpPrereqError("Error checking drbd helper on node"
2935                                      " '%s': %s" % (node, msg),
2936                                      errors.ECODE_ENVIRON)
2937         node_helper = helpers[node].payload
2938         if node_helper != self.op.drbd_helper:
2939           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2940                                      (node, node_helper), errors.ECODE_ENVIRON)
2941
2942     self.cluster = cluster = self.cfg.GetClusterInfo()
2943     # validate params changes
2944     if self.op.beparams:
2945       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2946       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2947
2948     if self.op.ndparams:
2949       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2950       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2951
2952       # TODO: we need a more general way to handle resetting
2953       # cluster-level parameters to default values
2954       if self.new_ndparams["oob_program"] == "":
2955         self.new_ndparams["oob_program"] = \
2956             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2957
2958     if self.op.nicparams:
2959       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2960       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2961       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2962       nic_errors = []
2963
2964       # check all instances for consistency
2965       for instance in self.cfg.GetAllInstancesInfo().values():
2966         for nic_idx, nic in enumerate(instance.nics):
2967           params_copy = copy.deepcopy(nic.nicparams)
2968           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2969
2970           # check parameter syntax
2971           try:
2972             objects.NIC.CheckParameterSyntax(params_filled)
2973           except errors.ConfigurationError, err:
2974             nic_errors.append("Instance %s, nic/%d: %s" %
2975                               (instance.name, nic_idx, err))
2976
2977           # if we're moving instances to routed, check that they have an ip
2978           target_mode = params_filled[constants.NIC_MODE]
2979           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2980             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2981                               (instance.name, nic_idx))
2982       if nic_errors:
2983         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2984                                    "\n".join(nic_errors))
2985
2986     # hypervisor list/parameters
2987     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2988     if self.op.hvparams:
2989       for hv_name, hv_dict in self.op.hvparams.items():
2990         if hv_name not in self.new_hvparams:
2991           self.new_hvparams[hv_name] = hv_dict
2992         else:
2993           self.new_hvparams[hv_name].update(hv_dict)
2994
2995     # os hypervisor parameters
2996     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2997     if self.op.os_hvp:
2998       for os_name, hvs in self.op.os_hvp.items():
2999         if os_name not in self.new_os_hvp:
3000           self.new_os_hvp[os_name] = hvs
3001         else:
3002           for hv_name, hv_dict in hvs.items():
3003             if hv_name not in self.new_os_hvp[os_name]:
3004               self.new_os_hvp[os_name][hv_name] = hv_dict
3005             else:
3006               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3007
3008     # os parameters
3009     self.new_osp = objects.FillDict(cluster.osparams, {})
3010     if self.op.osparams:
3011       for os_name, osp in self.op.osparams.items():
3012         if os_name not in self.new_osp:
3013           self.new_osp[os_name] = {}
3014
3015         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3016                                                   use_none=True)
3017
3018         if not self.new_osp[os_name]:
3019           # we removed all parameters
3020           del self.new_osp[os_name]
3021         else:
3022           # check the parameter validity (remote check)
3023           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3024                          os_name, self.new_osp[os_name])
3025
3026     # changes to the hypervisor list
3027     if self.op.enabled_hypervisors is not None:
3028       self.hv_list = self.op.enabled_hypervisors
3029       for hv in self.hv_list:
3030         # if the hypervisor doesn't already exist in the cluster
3031         # hvparams, we initialize it to empty, and then (in both
3032         # cases) we make sure to fill the defaults, as we might not
3033         # have a complete defaults list if the hypervisor wasn't
3034         # enabled before
3035         if hv not in new_hvp:
3036           new_hvp[hv] = {}
3037         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3038         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3039     else:
3040       self.hv_list = cluster.enabled_hypervisors
3041
3042     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3043       # either the enabled list has changed, or the parameters have, validate
3044       for hv_name, hv_params in self.new_hvparams.items():
3045         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3046             (self.op.enabled_hypervisors and
3047              hv_name in self.op.enabled_hypervisors)):
3048           # either this is a new hypervisor, or its parameters have changed
3049           hv_class = hypervisor.GetHypervisor(hv_name)
3050           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3051           hv_class.CheckParameterSyntax(hv_params)
3052           _CheckHVParams(self, node_list, hv_name, hv_params)
3053
3054     if self.op.os_hvp:
3055       # no need to check any newly-enabled hypervisors, since the
3056       # defaults have already been checked in the above code-block
3057       for os_name, os_hvp in self.new_os_hvp.items():
3058         for hv_name, hv_params in os_hvp.items():
3059           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3060           # we need to fill in the new os_hvp on top of the actual hv_p
3061           cluster_defaults = self.new_hvparams.get(hv_name, {})
3062           new_osp = objects.FillDict(cluster_defaults, hv_params)
3063           hv_class = hypervisor.GetHypervisor(hv_name)
3064           hv_class.CheckParameterSyntax(new_osp)
3065           _CheckHVParams(self, node_list, hv_name, new_osp)
3066
3067     if self.op.default_iallocator:
3068       alloc_script = utils.FindFile(self.op.default_iallocator,
3069                                     constants.IALLOCATOR_SEARCH_PATH,
3070                                     os.path.isfile)
3071       if alloc_script is None:
3072         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3073                                    " specified" % self.op.default_iallocator,
3074                                    errors.ECODE_INVAL)
3075
3076   def Exec(self, feedback_fn):
3077     """Change the parameters of the cluster.
3078
3079     """
3080     if self.op.vg_name is not None:
3081       new_volume = self.op.vg_name
3082       if not new_volume:
3083         new_volume = None
3084       if new_volume != self.cfg.GetVGName():
3085         self.cfg.SetVGName(new_volume)
3086       else:
3087         feedback_fn("Cluster LVM configuration already in desired"
3088                     " state, not changing")
3089     if self.op.drbd_helper is not None:
3090       new_helper = self.op.drbd_helper
3091       if not new_helper:
3092         new_helper = None
3093       if new_helper != self.cfg.GetDRBDHelper():
3094         self.cfg.SetDRBDHelper(new_helper)
3095       else:
3096         feedback_fn("Cluster DRBD helper already in desired state,"
3097                     " not changing")
3098     if self.op.hvparams:
3099       self.cluster.hvparams = self.new_hvparams
3100     if self.op.os_hvp:
3101       self.cluster.os_hvp = self.new_os_hvp
3102     if self.op.enabled_hypervisors is not None:
3103       self.cluster.hvparams = self.new_hvparams
3104       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3105     if self.op.beparams:
3106       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3107     if self.op.nicparams:
3108       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3109     if self.op.osparams:
3110       self.cluster.osparams = self.new_osp
3111     if self.op.ndparams:
3112       self.cluster.ndparams = self.new_ndparams
3113
3114     if self.op.candidate_pool_size is not None:
3115       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3116       # we need to update the pool size here, otherwise the save will fail
3117       _AdjustCandidatePool(self, [])
3118
3119     if self.op.maintain_node_health is not None:
3120       self.cluster.maintain_node_health = self.op.maintain_node_health
3121
3122     if self.op.prealloc_wipe_disks is not None:
3123       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3124
3125     if self.op.add_uids is not None:
3126       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3127
3128     if self.op.remove_uids is not None:
3129       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3130
3131     if self.op.uid_pool is not None:
3132       self.cluster.uid_pool = self.op.uid_pool
3133
3134     if self.op.default_iallocator is not None:
3135       self.cluster.default_iallocator = self.op.default_iallocator
3136
3137     if self.op.reserved_lvs is not None:
3138       self.cluster.reserved_lvs = self.op.reserved_lvs
3139
3140     def helper_os(aname, mods, desc):
3141       desc += " OS list"
3142       lst = getattr(self.cluster, aname)
3143       for key, val in mods:
3144         if key == constants.DDM_ADD:
3145           if val in lst:
3146             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3147           else:
3148             lst.append(val)
3149         elif key == constants.DDM_REMOVE:
3150           if val in lst:
3151             lst.remove(val)
3152           else:
3153             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3154         else:
3155           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3156
3157     if self.op.hidden_os:
3158       helper_os("hidden_os", self.op.hidden_os, "hidden")
3159
3160     if self.op.blacklisted_os:
3161       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3162
3163     if self.op.master_netdev:
3164       master = self.cfg.GetMasterNode()
3165       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3166                   self.cluster.master_netdev)
3167       result = self.rpc.call_node_stop_master(master, False)
3168       result.Raise("Could not disable the master ip")
3169       feedback_fn("Changing master_netdev from %s to %s" %
3170                   (self.cluster.master_netdev, self.op.master_netdev))
3171       self.cluster.master_netdev = self.op.master_netdev
3172
3173     self.cfg.Update(self.cluster, feedback_fn)
3174
3175     if self.op.master_netdev:
3176       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3177                   self.op.master_netdev)
3178       result = self.rpc.call_node_start_master(master, False, False)
3179       if result.fail_msg:
3180         self.LogWarning("Could not re-enable the master ip on"
3181                         " the master, please restart manually: %s",
3182                         result.fail_msg)
3183
3184
3185 def _UploadHelper(lu, nodes, fname):
3186   """Helper for uploading a file and showing warnings.
3187
3188   """
3189   if os.path.exists(fname):
3190     result = lu.rpc.call_upload_file(nodes, fname)
3191     for to_node, to_result in result.items():
3192       msg = to_result.fail_msg
3193       if msg:
3194         msg = ("Copy of file %s to node %s failed: %s" %
3195                (fname, to_node, msg))
3196         lu.proc.LogWarning(msg)
3197
3198
3199 def _ComputeAncillaryFiles(cluster, redist):
3200   """Compute files external to Ganeti which need to be consistent.
3201
3202   @type redist: boolean
3203   @param redist: Whether to include files which need to be redistributed
3204
3205   """
3206   # Compute files for all nodes
3207   files_all = set([
3208     constants.SSH_KNOWN_HOSTS_FILE,
3209     constants.CONFD_HMAC_KEY,
3210     constants.CLUSTER_DOMAIN_SECRET_FILE,
3211     ])
3212
3213   if not redist:
3214     files_all.update(constants.ALL_CERT_FILES)
3215     files_all.update(ssconf.SimpleStore().GetFileList())
3216
3217   if cluster.modify_etc_hosts:
3218     files_all.add(constants.ETC_HOSTS)
3219
3220   # Files which must either exist on all nodes or on none
3221   files_all_opt = set([
3222     constants.RAPI_USERS_FILE,
3223     ])
3224
3225   # Files which should only be on master candidates
3226   files_mc = set()
3227   if not redist:
3228     files_mc.add(constants.CLUSTER_CONF_FILE)
3229
3230   # Files which should only be on VM-capable nodes
3231   files_vm = set(filename
3232     for hv_name in cluster.enabled_hypervisors
3233     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3234
3235   # Filenames must be unique
3236   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3237           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3238          "Found file listed in more than one file list"
3239
3240   return (files_all, files_all_opt, files_mc, files_vm)
3241
3242
3243 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3244   """Distribute additional files which are part of the cluster configuration.
3245
3246   ConfigWriter takes care of distributing the config and ssconf files, but
3247   there are more files which should be distributed to all nodes. This function
3248   makes sure those are copied.
3249
3250   @param lu: calling logical unit
3251   @param additional_nodes: list of nodes not in the config to distribute to
3252   @type additional_vm: boolean
3253   @param additional_vm: whether the additional nodes are vm-capable or not
3254
3255   """
3256   # Gather target nodes
3257   cluster = lu.cfg.GetClusterInfo()
3258   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3259
3260   online_nodes = lu.cfg.GetOnlineNodeList()
3261   vm_nodes = lu.cfg.GetVmCapableNodeList()
3262
3263   if additional_nodes is not None:
3264     online_nodes.extend(additional_nodes)
3265     if additional_vm:
3266       vm_nodes.extend(additional_nodes)
3267
3268   # Never distribute to master node
3269   for nodelist in [online_nodes, vm_nodes]:
3270     if master_info.name in nodelist:
3271       nodelist.remove(master_info.name)
3272
3273   # Gather file lists
3274   (files_all, files_all_opt, files_mc, files_vm) = \
3275     _ComputeAncillaryFiles(cluster, True)
3276
3277   # Never re-distribute configuration file from here
3278   assert not (constants.CLUSTER_CONF_FILE in files_all or
3279               constants.CLUSTER_CONF_FILE in files_vm)
3280   assert not files_mc, "Master candidates not handled in this function"
3281
3282   filemap = [
3283     (online_nodes, files_all),
3284     (online_nodes, files_all_opt),
3285     (vm_nodes, files_vm),
3286     ]
3287
3288   # Upload the files
3289   for (node_list, files) in filemap:
3290     for fname in files:
3291       _UploadHelper(lu, node_list, fname)
3292
3293
3294 class LUClusterRedistConf(NoHooksLU):
3295   """Force the redistribution of cluster configuration.
3296
3297   This is a very simple LU.
3298
3299   """
3300   REQ_BGL = False
3301
3302   def ExpandNames(self):
3303     self.needed_locks = {
3304       locking.LEVEL_NODE: locking.ALL_SET,
3305     }
3306     self.share_locks[locking.LEVEL_NODE] = 1
3307
3308   def Exec(self, feedback_fn):
3309     """Redistribute the configuration.
3310
3311     """
3312     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3313     _RedistributeAncillaryFiles(self)
3314
3315
3316 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3317   """Sleep and poll for an instance's disk to sync.
3318
3319   """
3320   if not instance.disks or disks is not None and not disks:
3321     return True
3322
3323   disks = _ExpandCheckDisks(instance, disks)
3324
3325   if not oneshot:
3326     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3327
3328   node = instance.primary_node
3329
3330   for dev in disks:
3331     lu.cfg.SetDiskID(dev, node)
3332
3333   # TODO: Convert to utils.Retry
3334
3335   retries = 0
3336   degr_retries = 10 # in seconds, as we sleep 1 second each time
3337   while True:
3338     max_time = 0
3339     done = True
3340     cumul_degraded = False
3341     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3342     msg = rstats.fail_msg
3343     if msg:
3344       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3345       retries += 1
3346       if retries >= 10:
3347         raise errors.RemoteError("Can't contact node %s for mirror data,"
3348                                  " aborting." % node)
3349       time.sleep(6)
3350       continue
3351     rstats = rstats.payload
3352     retries = 0
3353     for i, mstat in enumerate(rstats):
3354       if mstat is None:
3355         lu.LogWarning("Can't compute data for node %s/%s",
3356                            node, disks[i].iv_name)
3357         continue
3358
3359       cumul_degraded = (cumul_degraded or
3360                         (mstat.is_degraded and mstat.sync_percent is None))
3361       if mstat.sync_percent is not None:
3362         done = False
3363         if mstat.estimated_time is not None:
3364           rem_time = ("%s remaining (estimated)" %
3365                       utils.FormatSeconds(mstat.estimated_time))
3366           max_time = mstat.estimated_time
3367         else:
3368           rem_time = "no time estimate"
3369         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3370                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3371
3372     # if we're done but degraded, let's do a few small retries, to
3373     # make sure we see a stable and not transient situation; therefore
3374     # we force restart of the loop
3375     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3376       logging.info("Degraded disks found, %d retries left", degr_retries)
3377       degr_retries -= 1
3378       time.sleep(1)
3379       continue
3380
3381     if done or oneshot:
3382       break
3383
3384     time.sleep(min(60, max_time))
3385
3386   if done:
3387     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3388   return not cumul_degraded
3389
3390
3391 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3392   """Check that mirrors are not degraded.
3393
3394   The ldisk parameter, if True, will change the test from the
3395   is_degraded attribute (which represents overall non-ok status for
3396   the device(s)) to the ldisk (representing the local storage status).
3397
3398   """
3399   lu.cfg.SetDiskID(dev, node)
3400
3401   result = True
3402
3403   if on_primary or dev.AssembleOnSecondary():
3404     rstats = lu.rpc.call_blockdev_find(node, dev)
3405     msg = rstats.fail_msg
3406     if msg:
3407       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3408       result = False
3409     elif not rstats.payload:
3410       lu.LogWarning("Can't find disk on node %s", node)
3411       result = False
3412     else:
3413       if ldisk:
3414         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3415       else:
3416         result = result and not rstats.payload.is_degraded
3417
3418   if dev.children:
3419     for child in dev.children:
3420       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3421
3422   return result
3423
3424
3425 class LUOobCommand(NoHooksLU):
3426   """Logical unit for OOB handling.
3427
3428   """
3429   REG_BGL = False
3430   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3431
3432   def CheckPrereq(self):
3433     """Check prerequisites.
3434
3435     This checks:
3436      - the node exists in the configuration
3437      - OOB is supported
3438
3439     Any errors are signaled by raising errors.OpPrereqError.
3440
3441     """
3442     self.nodes = []
3443     self.master_node = self.cfg.GetMasterNode()
3444
3445     assert self.op.power_delay >= 0.0
3446
3447     if self.op.node_names:
3448       if (self.op.command in self._SKIP_MASTER and
3449           self.master_node in self.op.node_names):
3450         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3451         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3452
3453         if master_oob_handler:
3454           additional_text = ("run '%s %s %s' if you want to operate on the"
3455                              " master regardless") % (master_oob_handler,
3456                                                       self.op.command,
3457                                                       self.master_node)
3458         else:
3459           additional_text = "it does not support out-of-band operations"
3460
3461         raise errors.OpPrereqError(("Operating on the master node %s is not"
3462                                     " allowed for %s; %s") %
3463                                    (self.master_node, self.op.command,
3464                                     additional_text), errors.ECODE_INVAL)
3465     else:
3466       self.op.node_names = self.cfg.GetNodeList()
3467       if self.op.command in self._SKIP_MASTER:
3468         self.op.node_names.remove(self.master_node)
3469
3470     if self.op.command in self._SKIP_MASTER:
3471       assert self.master_node not in self.op.node_names
3472
3473     for node_name in self.op.node_names:
3474       node = self.cfg.GetNodeInfo(node_name)
3475
3476       if node is None:
3477         raise errors.OpPrereqError("Node %s not found" % node_name,
3478                                    errors.ECODE_NOENT)
3479       else:
3480         self.nodes.append(node)
3481
3482       if (not self.op.ignore_status and
3483           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3484         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3485                                     " not marked offline") % node_name,
3486                                    errors.ECODE_STATE)
3487
3488   def ExpandNames(self):
3489     """Gather locks we need.
3490
3491     """
3492     if self.op.node_names:
3493       self.op.node_names = [_ExpandNodeName(self.cfg, name)
3494                             for name in self.op.node_names]
3495       lock_names = self.op.node_names
3496     else:
3497       lock_names = locking.ALL_SET
3498
3499     self.needed_locks = {
3500       locking.LEVEL_NODE: lock_names,
3501       }
3502
3503   def Exec(self, feedback_fn):
3504     """Execute OOB and return result if we expect any.
3505
3506     """
3507     master_node = self.master_node
3508     ret = []
3509
3510     for idx, node in enumerate(self.nodes):
3511       node_entry = [(constants.RS_NORMAL, node.name)]
3512       ret.append(node_entry)
3513
3514       oob_program = _SupportsOob(self.cfg, node)
3515
3516       if not oob_program:
3517         node_entry.append((constants.RS_UNAVAIL, None))
3518         continue
3519
3520       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3521                    self.op.command, oob_program, node.name)
3522       result = self.rpc.call_run_oob(master_node, oob_program,
3523                                      self.op.command, node.name,
3524                                      self.op.timeout)
3525
3526       if result.fail_msg:
3527         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3528                         node.name, result.fail_msg)
3529         node_entry.append((constants.RS_NODATA, None))
3530       else:
3531         try:
3532           self._CheckPayload(result)
3533         except errors.OpExecError, err:
3534           self.LogWarning("Payload returned by node '%s' is not valid: %s",
3535                           node.name, err)
3536           node_entry.append((constants.RS_NODATA, None))
3537         else:
3538           if self.op.command == constants.OOB_HEALTH:
3539             # For health we should log important events
3540             for item, status in result.payload:
3541               if status in [constants.OOB_STATUS_WARNING,
3542                             constants.OOB_STATUS_CRITICAL]:
3543                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
3544                                 item, node.name, status)
3545
3546           if self.op.command == constants.OOB_POWER_ON:
3547             node.powered = True
3548           elif self.op.command == constants.OOB_POWER_OFF:
3549             node.powered = False
3550           elif self.op.command == constants.OOB_POWER_STATUS:
3551             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3552             if powered != node.powered:
3553               logging.warning(("Recorded power state (%s) of node '%s' does not"
3554                                " match actual power state (%s)"), node.powered,
3555                               node.name, powered)
3556
3557           # For configuration changing commands we should update the node
3558           if self.op.command in (constants.OOB_POWER_ON,
3559                                  constants.OOB_POWER_OFF):
3560             self.cfg.Update(node, feedback_fn)
3561
3562           node_entry.append((constants.RS_NORMAL, result.payload))
3563
3564           if (self.op.command == constants.OOB_POWER_ON and
3565               idx < len(self.nodes) - 1):
3566             time.sleep(self.op.power_delay)
3567
3568     return ret
3569
3570   def _CheckPayload(self, result):
3571     """Checks if the payload is valid.
3572
3573     @param result: RPC result
3574     @raises errors.OpExecError: If payload is not valid
3575
3576     """
3577     errs = []
3578     if self.op.command == constants.OOB_HEALTH:
3579       if not isinstance(result.payload, list):
3580         errs.append("command 'health' is expected to return a list but got %s" %
3581                     type(result.payload))
3582       else:
3583         for item, status in result.payload:
3584           if status not in constants.OOB_STATUSES:
3585             errs.append("health item '%s' has invalid status '%s'" %
3586                         (item, status))
3587
3588     if self.op.command == constants.OOB_POWER_STATUS:
3589       if not isinstance(result.payload, dict):
3590         errs.append("power-status is expected to return a dict but got %s" %
3591                     type(result.payload))
3592
3593     if self.op.command in [
3594         constants.OOB_POWER_ON,
3595         constants.OOB_POWER_OFF,
3596         constants.OOB_POWER_CYCLE,
3597         ]:
3598       if result.payload is not None:
3599         errs.append("%s is expected to not return payload but got '%s'" %
3600                     (self.op.command, result.payload))
3601
3602     if errs:
3603       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3604                                utils.CommaJoin(errs))
3605
3606 class _OsQuery(_QueryBase):
3607   FIELDS = query.OS_FIELDS
3608
3609   def ExpandNames(self, lu):
3610     # Lock all nodes in shared mode
3611     # Temporary removal of locks, should be reverted later
3612     # TODO: reintroduce locks when they are lighter-weight
3613     lu.needed_locks = {}
3614     #self.share_locks[locking.LEVEL_NODE] = 1
3615     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3616
3617     # The following variables interact with _QueryBase._GetNames
3618     if self.names:
3619       self.wanted = self.names
3620     else:
3621       self.wanted = locking.ALL_SET
3622
3623     self.do_locking = self.use_locking
3624
3625   def DeclareLocks(self, lu, level):
3626     pass
3627
3628   @staticmethod
3629   def _DiagnoseByOS(rlist):
3630     """Remaps a per-node return list into an a per-os per-node dictionary
3631
3632     @param rlist: a map with node names as keys and OS objects as values
3633
3634     @rtype: dict
3635     @return: a dictionary with osnames as keys and as value another
3636         map, with nodes as keys and tuples of (path, status, diagnose,
3637         variants, parameters, api_versions) as values, eg::
3638
3639           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3640                                      (/srv/..., False, "invalid api")],
3641                            "node2": [(/srv/..., True, "", [], [])]}
3642           }
3643
3644     """
3645     all_os = {}
3646     # we build here the list of nodes that didn't fail the RPC (at RPC
3647     # level), so that nodes with a non-responding node daemon don't
3648     # make all OSes invalid
3649     good_nodes = [node_name for node_name in rlist
3650                   if not rlist[node_name].fail_msg]
3651     for node_name, nr in rlist.items():
3652       if nr.fail_msg or not nr.payload:
3653         continue
3654       for (name, path, status, diagnose, variants,
3655            params, api_versions) in nr.payload:
3656         if name not in all_os:
3657           # build a list of nodes for this os containing empty lists
3658           # for each node in node_list
3659           all_os[name] = {}
3660           for nname in good_nodes:
3661             all_os[name][nname] = []
3662         # convert params from [name, help] to (name, help)
3663         params = [tuple(v) for v in params]
3664         all_os[name][node_name].append((path, status, diagnose,
3665                                         variants, params, api_versions))
3666     return all_os
3667
3668   def _GetQueryData(self, lu):
3669     """Computes the list of nodes and their attributes.
3670
3671     """
3672     # Locking is not used
3673     assert not (compat.any(lu.glm.is_owned(level)
3674                            for level in locking.LEVELS) or
3675                 self.do_locking or self.use_locking)
3676
3677     valid_nodes = [node.name
3678                    for node in lu.cfg.GetAllNodesInfo().values()
3679                    if not node.offline and node.vm_capable]
3680     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3681     cluster = lu.cfg.GetClusterInfo()
3682
3683     data = {}
3684
3685     for (os_name, os_data) in pol.items():
3686       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3687                           hidden=(os_name in cluster.hidden_os),
3688                           blacklisted=(os_name in cluster.blacklisted_os))
3689
3690       variants = set()
3691       parameters = set()
3692       api_versions = set()
3693
3694       for idx, osl in enumerate(os_data.values()):
3695         info.valid = bool(info.valid and osl and osl[0][1])
3696         if not info.valid:
3697           break
3698
3699         (node_variants, node_params, node_api) = osl[0][3:6]
3700         if idx == 0:
3701           # First entry
3702           variants.update(node_variants)
3703           parameters.update(node_params)
3704           api_versions.update(node_api)
3705         else:
3706           # Filter out inconsistent values
3707           variants.intersection_update(node_variants)
3708           parameters.intersection_update(node_params)
3709           api_versions.intersection_update(node_api)
3710
3711       info.variants = list(variants)
3712       info.parameters = list(parameters)
3713       info.api_versions = list(api_versions)
3714
3715       data[os_name] = info
3716
3717     # Prepare data in requested order
3718     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3719             if name in data]
3720
3721
3722 class LUOsDiagnose(NoHooksLU):
3723   """Logical unit for OS diagnose/query.
3724
3725   """
3726   REQ_BGL = False
3727
3728   @staticmethod
3729   def _BuildFilter(fields, names):
3730     """Builds a filter for querying OSes.
3731
3732     """
3733     name_filter = qlang.MakeSimpleFilter("name", names)
3734
3735     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3736     # respective field is not requested
3737     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3738                      for fname in ["hidden", "blacklisted"]
3739                      if fname not in fields]
3740     if "valid" not in fields:
3741       status_filter.append([qlang.OP_TRUE, "valid"])
3742
3743     if status_filter:
3744       status_filter.insert(0, qlang.OP_AND)
3745     else:
3746       status_filter = None
3747
3748     if name_filter and status_filter:
3749       return [qlang.OP_AND, name_filter, status_filter]
3750     elif name_filter:
3751       return name_filter
3752     else:
3753       return status_filter
3754
3755   def CheckArguments(self):
3756     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3757                        self.op.output_fields, False)
3758
3759   def ExpandNames(self):
3760     self.oq.ExpandNames(self)
3761
3762   def Exec(self, feedback_fn):
3763     return self.oq.OldStyleQuery(self)
3764
3765
3766 class LUNodeRemove(LogicalUnit):
3767   """Logical unit for removing a node.
3768
3769   """
3770   HPATH = "node-remove"
3771   HTYPE = constants.HTYPE_NODE
3772
3773   def BuildHooksEnv(self):
3774     """Build hooks env.
3775
3776     This doesn't run on the target node in the pre phase as a failed
3777     node would then be impossible to remove.
3778
3779     """
3780     return {
3781       "OP_TARGET": self.op.node_name,
3782       "NODE_NAME": self.op.node_name,
3783       }
3784
3785   def BuildHooksNodes(self):
3786     """Build hooks nodes.
3787
3788     """
3789     all_nodes = self.cfg.GetNodeList()
3790     try:
3791       all_nodes.remove(self.op.node_name)
3792     except ValueError:
3793       logging.warning("Node '%s', which is about to be removed, was not found"
3794                       " in the list of all nodes", self.op.node_name)
3795     return (all_nodes, all_nodes)
3796
3797   def CheckPrereq(self):
3798     """Check prerequisites.
3799
3800     This checks:
3801      - the node exists in the configuration
3802      - it does not have primary or secondary instances
3803      - it's not the master
3804
3805     Any errors are signaled by raising errors.OpPrereqError.
3806
3807     """
3808     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3809     node = self.cfg.GetNodeInfo(self.op.node_name)
3810     assert node is not None
3811
3812     instance_list = self.cfg.GetInstanceList()
3813
3814     masternode = self.cfg.GetMasterNode()
3815     if node.name == masternode:
3816       raise errors.OpPrereqError("Node is the master node, failover to another"
3817                                  " node is required", errors.ECODE_INVAL)
3818
3819     for instance_name in instance_list:
3820       instance = self.cfg.GetInstanceInfo(instance_name)
3821       if node.name in instance.all_nodes:
3822         raise errors.OpPrereqError("Instance %s is still running on the node,"
3823                                    " please remove first" % instance_name,
3824                                    errors.ECODE_INVAL)
3825     self.op.node_name = node.name
3826     self.node = node
3827
3828   def Exec(self, feedback_fn):
3829     """Removes the node from the cluster.
3830
3831     """
3832     node = self.node
3833     logging.info("Stopping the node daemon and removing configs from node %s",
3834                  node.name)
3835
3836     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3837
3838     # Promote nodes to master candidate as needed
3839     _AdjustCandidatePool(self, exceptions=[node.name])
3840     self.context.RemoveNode(node.name)
3841
3842     # Run post hooks on the node before it's removed
3843     _RunPostHook(self, node.name)
3844
3845     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3846     msg = result.fail_msg
3847     if msg:
3848       self.LogWarning("Errors encountered on the remote node while leaving"
3849                       " the cluster: %s", msg)
3850
3851     # Remove node from our /etc/hosts
3852     if self.cfg.GetClusterInfo().modify_etc_hosts:
3853       master_node = self.cfg.GetMasterNode()
3854       result = self.rpc.call_etc_hosts_modify(master_node,
3855                                               constants.ETC_HOSTS_REMOVE,
3856                                               node.name, None)
3857       result.Raise("Can't update hosts file with new host data")
3858       _RedistributeAncillaryFiles(self)
3859
3860
3861 class _NodeQuery(_QueryBase):
3862   FIELDS = query.NODE_FIELDS
3863
3864   def ExpandNames(self, lu):
3865     lu.needed_locks = {}
3866     lu.share_locks[locking.LEVEL_NODE] = 1
3867
3868     if self.names:
3869       self.wanted = _GetWantedNodes(lu, self.names)
3870     else:
3871       self.wanted = locking.ALL_SET
3872
3873     self.do_locking = (self.use_locking and
3874                        query.NQ_LIVE in self.requested_data)
3875
3876     if self.do_locking:
3877       # if we don't request only static fields, we need to lock the nodes
3878       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3879
3880   def DeclareLocks(self, lu, level):
3881     pass
3882
3883   def _GetQueryData(self, lu):
3884     """Computes the list of nodes and their attributes.
3885
3886     """
3887     all_info = lu.cfg.GetAllNodesInfo()
3888
3889     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3890
3891     # Gather data as requested
3892     if query.NQ_LIVE in self.requested_data:
3893       # filter out non-vm_capable nodes
3894       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3895
3896       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3897                                         lu.cfg.GetHypervisorType())
3898       live_data = dict((name, nresult.payload)
3899                        for (name, nresult) in node_data.items()
3900                        if not nresult.fail_msg and nresult.payload)
3901     else:
3902       live_data = None
3903
3904     if query.NQ_INST in self.requested_data:
3905       node_to_primary = dict([(name, set()) for name in nodenames])
3906       node_to_secondary = dict([(name, set()) for name in nodenames])
3907
3908       inst_data = lu.cfg.GetAllInstancesInfo()
3909
3910       for inst in inst_data.values():
3911         if inst.primary_node in node_to_primary:
3912           node_to_primary[inst.primary_node].add(inst.name)
3913         for secnode in inst.secondary_nodes:
3914           if secnode in node_to_secondary:
3915             node_to_secondary[secnode].add(inst.name)
3916     else:
3917       node_to_primary = None
3918       node_to_secondary = None
3919
3920     if query.NQ_OOB in self.requested_data:
3921       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3922                          for name, node in all_info.iteritems())
3923     else:
3924       oob_support = None
3925
3926     if query.NQ_GROUP in self.requested_data:
3927       groups = lu.cfg.GetAllNodeGroupsInfo()
3928     else:
3929       groups = {}
3930
3931     return query.NodeQueryData([all_info[name] for name in nodenames],
3932                                live_data, lu.cfg.GetMasterNode(),
3933                                node_to_primary, node_to_secondary, groups,
3934                                oob_support, lu.cfg.GetClusterInfo())
3935
3936
3937 class LUNodeQuery(NoHooksLU):
3938   """Logical unit for querying nodes.
3939
3940   """
3941   # pylint: disable-msg=W0142
3942   REQ_BGL = False
3943
3944   def CheckArguments(self):
3945     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3946                          self.op.output_fields, self.op.use_locking)
3947
3948   def ExpandNames(self):
3949     self.nq.ExpandNames(self)
3950
3951   def Exec(self, feedback_fn):
3952     return self.nq.OldStyleQuery(self)
3953
3954
3955 class LUNodeQueryvols(NoHooksLU):
3956   """Logical unit for getting volumes on node(s).
3957
3958   """
3959   REQ_BGL = False
3960   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3961   _FIELDS_STATIC = utils.FieldSet("node")
3962
3963   def CheckArguments(self):
3964     _CheckOutputFields(static=self._FIELDS_STATIC,
3965                        dynamic=self._FIELDS_DYNAMIC,
3966                        selected=self.op.output_fields)
3967
3968   def ExpandNames(self):
3969     self.needed_locks = {}
3970     self.share_locks[locking.LEVEL_NODE] = 1
3971     if not self.op.nodes:
3972       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3973     else:
3974       self.needed_locks[locking.LEVEL_NODE] = \
3975         _GetWantedNodes(self, self.op.nodes)
3976
3977   def Exec(self, feedback_fn):
3978     """Computes the list of nodes and their attributes.
3979
3980     """
3981     nodenames = self.glm.list_owned(locking.LEVEL_NODE)
3982     volumes = self.rpc.call_node_volumes(nodenames)
3983
3984     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3985              in self.cfg.GetInstanceList()]
3986
3987     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3988
3989     output = []
3990     for node in nodenames:
3991       nresult = volumes[node]
3992       if nresult.offline:
3993         continue
3994       msg = nresult.fail_msg
3995       if msg:
3996         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3997         continue
3998
3999       node_vols = nresult.payload[:]
4000       node_vols.sort(key=lambda vol: vol['dev'])
4001
4002       for vol in node_vols:
4003         node_output = []
4004         for field in self.op.output_fields:
4005           if field == "node":
4006             val = node
4007           elif field == "phys":
4008             val = vol['dev']
4009           elif field == "vg":
4010             val = vol['vg']
4011           elif field == "name":
4012             val = vol['name']
4013           elif field == "size":
4014             val = int(float(vol['size']))
4015           elif field == "instance":
4016             for inst in ilist:
4017               if node not in lv_by_node[inst]:
4018                 continue
4019               if vol['name'] in lv_by_node[inst][node]:
4020                 val = inst.name
4021                 break
4022             else:
4023               val = '-'
4024           else:
4025             raise errors.ParameterError(field)
4026           node_output.append(str(val))
4027
4028         output.append(node_output)
4029
4030     return output
4031
4032
4033 class LUNodeQueryStorage(NoHooksLU):
4034   """Logical unit for getting information on storage units on node(s).
4035
4036   """
4037   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4038   REQ_BGL = False
4039
4040   def CheckArguments(self):
4041     _CheckOutputFields(static=self._FIELDS_STATIC,
4042                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4043                        selected=self.op.output_fields)
4044
4045   def ExpandNames(self):
4046     self.needed_locks = {}
4047     self.share_locks[locking.LEVEL_NODE] = 1
4048
4049     if self.op.nodes:
4050       self.needed_locks[locking.LEVEL_NODE] = \
4051         _GetWantedNodes(self, self.op.nodes)
4052     else:
4053       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4054
4055   def Exec(self, feedback_fn):
4056     """Computes the list of nodes and their attributes.
4057
4058     """
4059     self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4060
4061     # Always get name to sort by
4062     if constants.SF_NAME in self.op.output_fields:
4063       fields = self.op.output_fields[:]
4064     else:
4065       fields = [constants.SF_NAME] + self.op.output_fields
4066
4067     # Never ask for node or type as it's only known to the LU
4068     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4069       while extra in fields:
4070         fields.remove(extra)
4071
4072     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4073     name_idx = field_idx[constants.SF_NAME]
4074
4075     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4076     data = self.rpc.call_storage_list(self.nodes,
4077                                       self.op.storage_type, st_args,
4078                                       self.op.name, fields)
4079
4080     result = []
4081
4082     for node in utils.NiceSort(self.nodes):
4083       nresult = data[node]
4084       if nresult.offline:
4085         continue
4086
4087       msg = nresult.fail_msg
4088       if msg:
4089         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4090         continue
4091
4092       rows = dict([(row[name_idx], row) for row in nresult.payload])
4093
4094       for name in utils.NiceSort(rows.keys()):
4095         row = rows[name]
4096
4097         out = []
4098
4099         for field in self.op.output_fields:
4100           if field == constants.SF_NODE:
4101             val = node
4102           elif field == constants.SF_TYPE:
4103             val = self.op.storage_type
4104           elif field in field_idx:
4105             val = row[field_idx[field]]
4106           else:
4107             raise errors.ParameterError(field)
4108
4109           out.append(val)
4110
4111         result.append(out)
4112
4113     return result
4114
4115
4116 class _InstanceQuery(_QueryBase):
4117   FIELDS = query.INSTANCE_FIELDS
4118
4119   def ExpandNames(self, lu):
4120     lu.needed_locks = {}
4121     lu.share_locks[locking.LEVEL_INSTANCE] = 1
4122     lu.share_locks[locking.LEVEL_NODE] = 1
4123
4124     if self.names:
4125       self.wanted = _GetWantedInstances(lu, self.names)
4126     else:
4127       self.wanted = locking.ALL_SET
4128
4129     self.do_locking = (self.use_locking and
4130                        query.IQ_LIVE in self.requested_data)
4131     if self.do_locking:
4132       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4133       lu.needed_locks[locking.LEVEL_NODE] = []
4134       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4135
4136   def DeclareLocks(self, lu, level):
4137     if level == locking.LEVEL_NODE and self.do_locking:
4138       lu._LockInstancesNodes() # pylint: disable-msg=W0212
4139
4140   def _GetQueryData(self, lu):
4141     """Computes the list of instances and their attributes.
4142
4143     """
4144     cluster = lu.cfg.GetClusterInfo()
4145     all_info = lu.cfg.GetAllInstancesInfo()
4146
4147     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4148
4149     instance_list = [all_info[name] for name in instance_names]
4150     nodes = frozenset(itertools.chain(*(inst.all_nodes
4151                                         for inst in instance_list)))
4152     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4153     bad_nodes = []
4154     offline_nodes = []
4155     wrongnode_inst = set()
4156
4157     # Gather data as requested
4158     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4159       live_data = {}
4160       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4161       for name in nodes:
4162         result = node_data[name]
4163         if result.offline:
4164           # offline nodes will be in both lists
4165           assert result.fail_msg
4166           offline_nodes.append(name)
4167         if result.fail_msg:
4168           bad_nodes.append(name)
4169         elif result.payload:
4170           for inst in result.payload:
4171             if inst in all_info:
4172               if all_info[inst].primary_node == name:
4173                 live_data.update(result.payload)
4174               else:
4175                 wrongnode_inst.add(inst)
4176             else:
4177               # orphan instance; we don't list it here as we don't
4178               # handle this case yet in the output of instance listing
4179               logging.warning("Orphan instance '%s' found on node %s",
4180                               inst, name)
4181         # else no instance is alive
4182     else:
4183       live_data = {}
4184
4185     if query.IQ_DISKUSAGE in self.requested_data:
4186       disk_usage = dict((inst.name,
4187                          _ComputeDiskSize(inst.disk_template,
4188                                           [{constants.IDISK_SIZE: disk.size}
4189                                            for disk in inst.disks]))
4190                         for inst in instance_list)
4191     else:
4192       disk_usage = None
4193
4194     if query.IQ_CONSOLE in self.requested_data:
4195       consinfo = {}
4196       for inst in instance_list:
4197         if inst.name in live_data:
4198           # Instance is running
4199           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4200         else:
4201           consinfo[inst.name] = None
4202       assert set(consinfo.keys()) == set(instance_names)
4203     else:
4204       consinfo = None
4205
4206     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4207                                    disk_usage, offline_nodes, bad_nodes,
4208                                    live_data, wrongnode_inst, consinfo)
4209
4210
4211 class LUQuery(NoHooksLU):
4212   """Query for resources/items of a certain kind.
4213
4214   """
4215   # pylint: disable-msg=W0142
4216   REQ_BGL = False
4217
4218   def CheckArguments(self):
4219     qcls = _GetQueryImplementation(self.op.what)
4220
4221     self.impl = qcls(self.op.filter, self.op.fields, False)
4222
4223   def ExpandNames(self):
4224     self.impl.ExpandNames(self)
4225
4226   def DeclareLocks(self, level):
4227     self.impl.DeclareLocks(self, level)
4228
4229   def Exec(self, feedback_fn):
4230     return self.impl.NewStyleQuery(self)
4231
4232
4233 class LUQueryFields(NoHooksLU):
4234   """Query for resources/items of a certain kind.
4235
4236   """
4237   # pylint: disable-msg=W0142
4238   REQ_BGL = False
4239
4240   def CheckArguments(self):
4241     self.qcls = _GetQueryImplementation(self.op.what)
4242
4243   def ExpandNames(self):
4244     self.needed_locks = {}
4245
4246   def Exec(self, feedback_fn):
4247     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4248
4249
4250 class LUNodeModifyStorage(NoHooksLU):
4251   """Logical unit for modifying a storage volume on a node.
4252
4253   """
4254   REQ_BGL = False
4255
4256   def CheckArguments(self):
4257     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4258
4259     storage_type = self.op.storage_type
4260
4261     try:
4262       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4263     except KeyError:
4264       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4265                                  " modified" % storage_type,
4266                                  errors.ECODE_INVAL)
4267
4268     diff = set(self.op.changes.keys()) - modifiable
4269     if diff:
4270       raise errors.OpPrereqError("The following fields can not be modified for"
4271                                  " storage units of type '%s': %r" %
4272                                  (storage_type, list(diff)),
4273                                  errors.ECODE_INVAL)
4274
4275   def ExpandNames(self):
4276     self.needed_locks = {
4277       locking.LEVEL_NODE: self.op.node_name,
4278       }
4279
4280   def Exec(self, feedback_fn):
4281     """Computes the list of nodes and their attributes.
4282
4283     """
4284     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4285     result = self.rpc.call_storage_modify(self.op.node_name,
4286                                           self.op.storage_type, st_args,
4287                                           self.op.name, self.op.changes)
4288     result.Raise("Failed to modify storage unit '%s' on %s" %
4289                  (self.op.name, self.op.node_name))
4290
4291
4292 class LUNodeAdd(LogicalUnit):
4293   """Logical unit for adding node to the cluster.
4294
4295   """
4296   HPATH = "node-add"
4297   HTYPE = constants.HTYPE_NODE
4298   _NFLAGS = ["master_capable", "vm_capable"]
4299
4300   def CheckArguments(self):
4301     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4302     # validate/normalize the node name
4303     self.hostname = netutils.GetHostname(name=self.op.node_name,
4304                                          family=self.primary_ip_family)
4305     self.op.node_name = self.hostname.name
4306
4307     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4308       raise errors.OpPrereqError("Cannot readd the master node",
4309                                  errors.ECODE_STATE)
4310
4311     if self.op.readd and self.op.group:
4312       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4313                                  " being readded", errors.ECODE_INVAL)
4314
4315   def BuildHooksEnv(self):
4316     """Build hooks env.
4317
4318     This will run on all nodes before, and on all nodes + the new node after.
4319
4320     """
4321     return {
4322       "OP_TARGET": self.op.node_name,
4323       "NODE_NAME": self.op.node_name,
4324       "NODE_PIP": self.op.primary_ip,
4325       "NODE_SIP": self.op.secondary_ip,
4326       "MASTER_CAPABLE": str(self.op.master_capable),
4327       "VM_CAPABLE": str(self.op.vm_capable),
4328       }
4329
4330   def BuildHooksNodes(self):
4331     """Build hooks nodes.
4332
4333     """
4334     # Exclude added node
4335     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4336     post_nodes = pre_nodes + [self.op.node_name, ]
4337
4338     return (pre_nodes, post_nodes)
4339
4340   def CheckPrereq(self):
4341     """Check prerequisites.
4342
4343     This checks:
4344      - the new node is not already in the config
4345      - it is resolvable
4346      - its parameters (single/dual homed) matches the cluster
4347
4348     Any errors are signaled by raising errors.OpPrereqError.
4349
4350     """
4351     cfg = self.cfg
4352     hostname = self.hostname
4353     node = hostname.name
4354     primary_ip = self.op.primary_ip = hostname.ip
4355     if self.op.secondary_ip is None:
4356       if self.primary_ip_family == netutils.IP6Address.family:
4357         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4358                                    " IPv4 address must be given as secondary",
4359                                    errors.ECODE_INVAL)
4360       self.op.secondary_ip = primary_ip
4361
4362     secondary_ip = self.op.secondary_ip
4363     if not netutils.IP4Address.IsValid(secondary_ip):
4364       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4365                                  " address" % secondary_ip, errors.ECODE_INVAL)
4366
4367     node_list = cfg.GetNodeList()
4368     if not self.op.readd and node in node_list:
4369       raise errors.OpPrereqError("Node %s is already in the configuration" %
4370                                  node, errors.ECODE_EXISTS)
4371     elif self.op.readd and node not in node_list:
4372       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4373                                  errors.ECODE_NOENT)
4374
4375     self.changed_primary_ip = False
4376
4377     for existing_node_name in node_list:
4378       existing_node = cfg.GetNodeInfo(existing_node_name)
4379
4380       if self.op.readd and node == existing_node_name:
4381         if existing_node.secondary_ip != secondary_ip:
4382           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4383                                      " address configuration as before",
4384                                      errors.ECODE_INVAL)
4385         if existing_node.primary_ip != primary_ip:
4386           self.changed_primary_ip = True
4387
4388         continue
4389
4390       if (existing_node.primary_ip == primary_ip or
4391           existing_node.secondary_ip == primary_ip or
4392           existing_node.primary_ip == secondary_ip or
4393           existing_node.secondary_ip == secondary_ip):
4394         raise errors.OpPrereqError("New node ip address(es) conflict with"
4395                                    " existing node %s" % existing_node.name,
4396                                    errors.ECODE_NOTUNIQUE)
4397
4398     # After this 'if' block, None is no longer a valid value for the
4399     # _capable op attributes
4400     if self.op.readd:
4401       old_node = self.cfg.GetNodeInfo(node)
4402       assert old_node is not None, "Can't retrieve locked node %s" % node
4403       for attr in self._NFLAGS:
4404         if getattr(self.op, attr) is None:
4405           setattr(self.op, attr, getattr(old_node, attr))
4406     else:
4407       for attr in self._NFLAGS:
4408         if getattr(self.op, attr) is None:
4409           setattr(self.op, attr, True)
4410
4411     if self.op.readd and not self.op.vm_capable:
4412       pri, sec = cfg.GetNodeInstances(node)
4413       if pri or sec:
4414         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4415                                    " flag set to false, but it already holds"
4416                                    " instances" % node,
4417                                    errors.ECODE_STATE)
4418
4419     # check that the type of the node (single versus dual homed) is the
4420     # same as for the master
4421     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4422     master_singlehomed = myself.secondary_ip == myself.primary_ip
4423     newbie_singlehomed = secondary_ip == primary_ip
4424     if master_singlehomed != newbie_singlehomed:
4425       if master_singlehomed:
4426         raise errors.OpPrereqError("The master has no secondary ip but the"
4427                                    " new node has one",
4428                                    errors.ECODE_INVAL)
4429       else:
4430         raise errors.OpPrereqError("The master has a secondary ip but the"
4431                                    " new node doesn't have one",
4432                                    errors.ECODE_INVAL)
4433
4434     # checks reachability
4435     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4436       raise errors.OpPrereqError("Node not reachable by ping",
4437                                  errors.ECODE_ENVIRON)
4438
4439     if not newbie_singlehomed:
4440       # check reachability from my secondary ip to newbie's secondary ip
4441       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4442                            source=myself.secondary_ip):
4443         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4444                                    " based ping to node daemon port",
4445                                    errors.ECODE_ENVIRON)
4446
4447     if self.op.readd:
4448       exceptions = [node]
4449     else:
4450       exceptions = []
4451
4452     if self.op.master_capable:
4453       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4454     else:
4455       self.master_candidate = False
4456
4457     if self.op.readd:
4458       self.new_node = old_node
4459     else:
4460       node_group = cfg.LookupNodeGroup(self.op.group)
4461       self.new_node = objects.Node(name=node,
4462                                    primary_ip=primary_ip,
4463                                    secondary_ip=secondary_ip,
4464                                    master_candidate=self.master_candidate,
4465                                    offline=False, drained=False,
4466                                    group=node_group)
4467
4468     if self.op.ndparams:
4469       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4470
4471   def Exec(self, feedback_fn):
4472     """Adds the new node to the cluster.
4473
4474     """
4475     new_node = self.new_node
4476     node = new_node.name
4477
4478     # We adding a new node so we assume it's powered
4479     new_node.powered = True
4480
4481     # for re-adds, reset the offline/drained/master-candidate flags;
4482     # we need to reset here, otherwise offline would prevent RPC calls
4483     # later in the procedure; this also means that if the re-add
4484     # fails, we are left with a non-offlined, broken node
4485     if self.op.readd:
4486       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4487       self.LogInfo("Readding a node, the offline/drained flags were reset")
4488       # if we demote the node, we do cleanup later in the procedure
4489       new_node.master_candidate = self.master_candidate
4490       if self.changed_primary_ip:
4491         new_node.primary_ip = self.op.primary_ip
4492
4493     # copy the master/vm_capable flags
4494     for attr in self._NFLAGS:
4495       setattr(new_node, attr, getattr(self.op, attr))
4496
4497     # notify the user about any possible mc promotion
4498     if new_node.master_candidate:
4499       self.LogInfo("Node will be a master candidate")
4500
4501     if self.op.ndparams:
4502       new_node.ndparams = self.op.ndparams
4503     else:
4504       new_node.ndparams = {}
4505
4506     # check connectivity
4507     result = self.rpc.call_version([node])[node]
4508     result.Raise("Can't get version information from node %s" % node)
4509     if constants.PROTOCOL_VERSION == result.payload:
4510       logging.info("Communication to node %s fine, sw version %s match",
4511                    node, result.payload)
4512     else:
4513       raise errors.OpExecError("Version mismatch master version %s,"
4514                                " node version %s" %
4515                                (constants.PROTOCOL_VERSION, result.payload))
4516
4517     # Add node to our /etc/hosts, and add key to known_hosts
4518     if self.cfg.GetClusterInfo().modify_etc_hosts:
4519       master_node = self.cfg.GetMasterNode()
4520       result = self.rpc.call_etc_hosts_modify(master_node,
4521                                               constants.ETC_HOSTS_ADD,
4522                                               self.hostname.name,
4523                                               self.hostname.ip)
4524       result.Raise("Can't update hosts file with new host data")
4525
4526     if new_node.secondary_ip != new_node.primary_ip:
4527       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4528                                False)
4529
4530     node_verify_list = [self.cfg.GetMasterNode()]
4531     node_verify_param = {
4532       constants.NV_NODELIST: [node],
4533       # TODO: do a node-net-test as well?
4534     }
4535
4536     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4537                                        self.cfg.GetClusterName())
4538     for verifier in node_verify_list:
4539       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4540       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4541       if nl_payload:
4542         for failed in nl_payload:
4543           feedback_fn("ssh/hostname verification failed"
4544                       " (checking from %s): %s" %
4545                       (verifier, nl_payload[failed]))
4546         raise errors.OpExecError("ssh/hostname verification failed")
4547
4548     if self.op.readd:
4549       _RedistributeAncillaryFiles(self)
4550       self.context.ReaddNode(new_node)
4551       # make sure we redistribute the config
4552       self.cfg.Update(new_node, feedback_fn)
4553       # and make sure the new node will not have old files around
4554       if not new_node.master_candidate:
4555         result = self.rpc.call_node_demote_from_mc(new_node.name)
4556         msg = result.fail_msg
4557         if msg:
4558           self.LogWarning("Node failed to demote itself from master"
4559                           " candidate status: %s" % msg)
4560     else:
4561       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4562                                   additional_vm=self.op.vm_capable)
4563       self.context.AddNode(new_node, self.proc.GetECId())
4564
4565
4566 class LUNodeSetParams(LogicalUnit):
4567   """Modifies the parameters of a node.
4568
4569   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4570       to the node role (as _ROLE_*)
4571   @cvar _R2F: a dictionary from node role to tuples of flags
4572   @cvar _FLAGS: a list of attribute names corresponding to the flags
4573
4574   """
4575   HPATH = "node-modify"
4576   HTYPE = constants.HTYPE_NODE
4577   REQ_BGL = False
4578   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4579   _F2R = {
4580     (True, False, False): _ROLE_CANDIDATE,
4581     (False, True, False): _ROLE_DRAINED,
4582     (False, False, True): _ROLE_OFFLINE,
4583     (False, False, False): _ROLE_REGULAR,
4584     }
4585   _R2F = dict((v, k) for k, v in _F2R.items())
4586   _FLAGS = ["master_candidate", "drained", "offline"]
4587
4588   def CheckArguments(self):
4589     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4590     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4591                 self.op.master_capable, self.op.vm_capable,
4592                 self.op.secondary_ip, self.op.ndparams]
4593     if all_mods.count(None) == len(all_mods):
4594       raise errors.OpPrereqError("Please pass at least one modification",
4595                                  errors.ECODE_INVAL)
4596     if all_mods.count(True) > 1:
4597       raise errors.OpPrereqError("Can't set the node into more than one"
4598                                  " state at the same time",
4599                                  errors.ECODE_INVAL)
4600
4601     # Boolean value that tells us whether we might be demoting from MC
4602     self.might_demote = (self.op.master_candidate == False or
4603                          self.op.offline == True or
4604                          self.op.drained == True or
4605                          self.op.master_capable == False)
4606
4607     if self.op.secondary_ip:
4608       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4609         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4610                                    " address" % self.op.secondary_ip,
4611                                    errors.ECODE_INVAL)
4612
4613     self.lock_all = self.op.auto_promote and self.might_demote
4614     self.lock_instances = self.op.secondary_ip is not None
4615
4616   def ExpandNames(self):
4617     if self.lock_all:
4618       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4619     else:
4620       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4621
4622     if self.lock_instances:
4623       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4624
4625   def DeclareLocks(self, level):
4626     # If we have locked all instances, before waiting to lock nodes, release
4627     # all the ones living on nodes unrelated to the current operation.
4628     if level == locking.LEVEL_NODE and self.lock_instances:
4629       self.affected_instances = []
4630       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4631         instances_keep = []
4632
4633         # Build list of instances to release
4634         for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4635           instance = self.context.cfg.GetInstanceInfo(instance_name)
4636           if (instance.disk_template in constants.DTS_INT_MIRROR and
4637               self.op.node_name in instance.all_nodes):
4638             instances_keep.append(instance_name)
4639             self.affected_instances.append(instance)
4640
4641         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4642
4643         assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4644                 set(instances_keep))
4645
4646   def BuildHooksEnv(self):
4647     """Build hooks env.
4648
4649     This runs on the master node.
4650
4651     """
4652     return {
4653       "OP_TARGET": self.op.node_name,
4654       "MASTER_CANDIDATE": str(self.op.master_candidate),
4655       "OFFLINE": str(self.op.offline),
4656       "DRAINED": str(self.op.drained),
4657       "MASTER_CAPABLE": str(self.op.master_capable),
4658       "VM_CAPABLE": str(self.op.vm_capable),
4659       }
4660
4661   def BuildHooksNodes(self):
4662     """Build hooks nodes.
4663
4664     """
4665     nl = [self.cfg.GetMasterNode(), self.op.node_name]
4666     return (nl, nl)
4667
4668   def CheckPrereq(self):
4669     """Check prerequisites.
4670
4671     This only checks the instance list against the existing names.
4672
4673     """
4674     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4675
4676     if (self.op.master_candidate is not None or
4677         self.op.drained is not None or
4678         self.op.offline is not None):
4679       # we can't change the master's node flags
4680       if self.op.node_name == self.cfg.GetMasterNode():
4681         raise errors.OpPrereqError("The master role can be changed"
4682                                    " only via master-failover",
4683                                    errors.ECODE_INVAL)
4684
4685     if self.op.master_candidate and not node.master_capable:
4686       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4687                                  " it a master candidate" % node.name,
4688                                  errors.ECODE_STATE)
4689
4690     if self.op.vm_capable == False:
4691       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4692       if ipri or isec:
4693         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4694                                    " the vm_capable flag" % node.name,
4695                                    errors.ECODE_STATE)
4696
4697     if node.master_candidate and self.might_demote and not self.lock_all:
4698       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4699       # check if after removing the current node, we're missing master
4700       # candidates
4701       (mc_remaining, mc_should, _) = \
4702           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4703       if mc_remaining < mc_should:
4704         raise errors.OpPrereqError("Not enough master candidates, please"
4705                                    " pass auto promote option to allow"
4706                                    " promotion", errors.ECODE_STATE)
4707
4708     self.old_flags = old_flags = (node.master_candidate,
4709                                   node.drained, node.offline)
4710     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4711     self.old_role = old_role = self._F2R[old_flags]
4712
4713     # Check for ineffective changes
4714     for attr in self._FLAGS:
4715       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4716         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4717         setattr(self.op, attr, None)
4718
4719     # Past this point, any flag change to False means a transition
4720     # away from the respective state, as only real changes are kept
4721
4722     # TODO: We might query the real power state if it supports OOB
4723     if _SupportsOob(self.cfg, node):
4724       if self.op.offline is False and not (node.powered or
4725                                            self.op.powered == True):
4726         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4727                                     " offline status can be reset") %
4728                                    self.op.node_name)
4729     elif self.op.powered is not None:
4730       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4731                                   " as it does not support out-of-band"
4732                                   " handling") % self.op.node_name)
4733
4734     # If we're being deofflined/drained, we'll MC ourself if needed
4735     if (self.op.drained == False or self.op.offline == False or
4736         (self.op.master_capable and not node.master_capable)):
4737       if _DecideSelfPromotion(self):
4738         self.op.master_candidate = True
4739         self.LogInfo("Auto-promoting node to master candidate")
4740
4741     # If we're no longer master capable, we'll demote ourselves from MC
4742     if self.op.master_capable == False and node.master_candidate:
4743       self.LogInfo("Demoting from master candidate")
4744       self.op.master_candidate = False
4745
4746     # Compute new role
4747     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4748     if self.op.master_candidate:
4749       new_role = self._ROLE_CANDIDATE
4750     elif self.op.drained:
4751       new_role = self._ROLE_DRAINED
4752     elif self.op.offline:
4753       new_role = self._ROLE_OFFLINE
4754     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4755       # False is still in new flags, which means we're un-setting (the
4756       # only) True flag
4757       new_role = self._ROLE_REGULAR
4758     else: # no new flags, nothing, keep old role
4759       new_role = old_role
4760
4761     self.new_role = new_role
4762
4763     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4764       # Trying to transition out of offline status
4765       result = self.rpc.call_version([node.name])[node.name]
4766       if result.fail_msg:
4767         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4768                                    " to report its version: %s" %
4769                                    (node.name, result.fail_msg),
4770                                    errors.ECODE_STATE)
4771       else:
4772         self.LogWarning("Transitioning node from offline to online state"
4773                         " without using re-add. Please make sure the node"
4774                         " is healthy!")
4775
4776     if self.op.secondary_ip:
4777       # Ok even without locking, because this can't be changed by any LU
4778       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4779       master_singlehomed = master.secondary_ip == master.primary_ip
4780       if master_singlehomed and self.op.secondary_ip:
4781         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4782                                    " homed cluster", errors.ECODE_INVAL)
4783
4784       if node.offline:
4785         if self.affected_instances:
4786           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4787                                      " node has instances (%s) configured"
4788                                      " to use it" % self.affected_instances)
4789       else:
4790         # On online nodes, check that no instances are running, and that
4791         # the node has the new ip and we can reach it.
4792         for instance in self.affected_instances:
4793           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4794
4795         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4796         if master.name != node.name:
4797           # check reachability from master secondary ip to new secondary ip
4798           if not netutils.TcpPing(self.op.secondary_ip,
4799                                   constants.DEFAULT_NODED_PORT,
4800                                   source=master.secondary_ip):
4801             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4802                                        " based ping to node daemon port",
4803                                        errors.ECODE_ENVIRON)
4804
4805     if self.op.ndparams:
4806       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4807       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4808       self.new_ndparams = new_ndparams
4809
4810   def Exec(self, feedback_fn):
4811     """Modifies a node.
4812
4813     """
4814     node = self.node
4815     old_role = self.old_role
4816     new_role = self.new_role
4817
4818     result = []
4819
4820     if self.op.ndparams:
4821       node.ndparams = self.new_ndparams
4822
4823     if self.op.powered is not None:
4824       node.powered = self.op.powered
4825
4826     for attr in ["master_capable", "vm_capable"]:
4827       val = getattr(self.op, attr)
4828       if val is not None:
4829         setattr(node, attr, val)
4830         result.append((attr, str(val)))
4831
4832     if new_role != old_role:
4833       # Tell the node to demote itself, if no longer MC and not offline
4834       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4835         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4836         if msg:
4837           self.LogWarning("Node failed to demote itself: %s", msg)
4838
4839       new_flags = self._R2F[new_role]
4840       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4841         if of != nf:
4842           result.append((desc, str(nf)))
4843       (node.master_candidate, node.drained, node.offline) = new_flags
4844
4845       # we locked all nodes, we adjust the CP before updating this node
4846       if self.lock_all:
4847         _AdjustCandidatePool(self, [node.name])
4848
4849     if self.op.secondary_ip:
4850       node.secondary_ip = self.op.secondary_ip
4851       result.append(("secondary_ip", self.op.secondary_ip))
4852
4853     # this will trigger configuration file update, if needed
4854     self.cfg.Update(node, feedback_fn)
4855
4856     # this will trigger job queue propagation or cleanup if the mc
4857     # flag changed
4858     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4859       self.context.ReaddNode(node)
4860
4861     return result
4862
4863
4864 class LUNodePowercycle(NoHooksLU):
4865   """Powercycles a node.
4866
4867   """
4868   REQ_BGL = False
4869
4870   def CheckArguments(self):
4871     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4872     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4873       raise errors.OpPrereqError("The node is the master and the force"
4874                                  " parameter was not set",
4875                                  errors.ECODE_INVAL)
4876
4877   def ExpandNames(self):
4878     """Locking for PowercycleNode.
4879
4880     This is a last-resort option and shouldn't block on other
4881     jobs. Therefore, we grab no locks.
4882
4883     """
4884     self.needed_locks = {}
4885
4886   def Exec(self, feedback_fn):
4887     """Reboots a node.
4888
4889     """
4890     result = self.rpc.call_node_powercycle(self.op.node_name,
4891                                            self.cfg.GetHypervisorType())
4892     result.Raise("Failed to schedule the reboot")
4893     return result.payload
4894
4895
4896 class LUClusterQuery(NoHooksLU):
4897   """Query cluster configuration.
4898
4899   """
4900   REQ_BGL = False
4901
4902   def ExpandNames(self):
4903     self.needed_locks = {}
4904
4905   def Exec(self, feedback_fn):
4906     """Return cluster config.
4907
4908     """
4909     cluster = self.cfg.GetClusterInfo()
4910     os_hvp = {}
4911
4912     # Filter just for enabled hypervisors
4913     for os_name, hv_dict in cluster.os_hvp.items():
4914       os_hvp[os_name] = {}
4915       for hv_name, hv_params in hv_dict.items():
4916         if hv_name in cluster.enabled_hypervisors:
4917           os_hvp[os_name][hv_name] = hv_params
4918
4919     # Convert ip_family to ip_version
4920     primary_ip_version = constants.IP4_VERSION
4921     if cluster.primary_ip_family == netutils.IP6Address.family:
4922       primary_ip_version = constants.IP6_VERSION
4923
4924     result = {
4925       "software_version": constants.RELEASE_VERSION,
4926       "protocol_version": constants.PROTOCOL_VERSION,
4927       "config_version": constants.CONFIG_VERSION,
4928       "os_api_version": max(constants.OS_API_VERSIONS),
4929       "export_version": constants.EXPORT_VERSION,
4930       "architecture": (platform.architecture()[0], platform.machine()),
4931       "name": cluster.cluster_name,
4932       "master": cluster.master_node,
4933       "default_hypervisor": cluster.enabled_hypervisors[0],
4934       "enabled_hypervisors": cluster.enabled_hypervisors,
4935       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4936                         for hypervisor_name in cluster.enabled_hypervisors]),
4937       "os_hvp": os_hvp,
4938       "beparams": cluster.beparams,
4939       "osparams": cluster.osparams,
4940       "nicparams": cluster.nicparams,
4941       "ndparams": cluster.ndparams,
4942       "candidate_pool_size": cluster.candidate_pool_size,
4943       "master_netdev": cluster.master_netdev,
4944       "volume_group_name": cluster.volume_group_name,
4945       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4946       "file_storage_dir": cluster.file_storage_dir,
4947       "shared_file_storage_dir": cluster.shared_file_storage_dir,
4948       "maintain_node_health": cluster.maintain_node_health,
4949       "ctime": cluster.ctime,
4950       "mtime": cluster.mtime,
4951       "uuid": cluster.uuid,
4952       "tags": list(cluster.GetTags()),
4953       "uid_pool": cluster.uid_pool,
4954       "default_iallocator": cluster.default_iallocator,
4955       "reserved_lvs": cluster.reserved_lvs,
4956       "primary_ip_version": primary_ip_version,
4957       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4958       "hidden_os": cluster.hidden_os,
4959       "blacklisted_os": cluster.blacklisted_os,
4960       }
4961
4962     return result
4963
4964
4965 class LUClusterConfigQuery(NoHooksLU):
4966   """Return configuration values.
4967
4968   """
4969   REQ_BGL = False
4970   _FIELDS_DYNAMIC = utils.FieldSet()
4971   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4972                                   "watcher_pause", "volume_group_name")
4973
4974   def CheckArguments(self):
4975     _CheckOutputFields(static=self._FIELDS_STATIC,
4976                        dynamic=self._FIELDS_DYNAMIC,
4977                        selected=self.op.output_fields)
4978
4979   def ExpandNames(self):
4980     self.needed_locks = {}
4981
4982   def Exec(self, feedback_fn):
4983     """Dump a representation of the cluster config to the standard output.
4984
4985     """
4986     values = []
4987     for field in self.op.output_fields:
4988       if field == "cluster_name":
4989         entry = self.cfg.GetClusterName()
4990       elif field == "master_node":
4991         entry = self.cfg.GetMasterNode()
4992       elif field == "drain_flag":
4993         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4994       elif field == "watcher_pause":
4995         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4996       elif field == "volume_group_name":
4997         entry = self.cfg.GetVGName()
4998       else:
4999         raise errors.ParameterError(field)
5000       values.append(entry)
5001     return values
5002
5003
5004 class LUInstanceActivateDisks(NoHooksLU):
5005   """Bring up an instance's disks.
5006
5007   """
5008   REQ_BGL = False
5009
5010   def ExpandNames(self):
5011     self._ExpandAndLockInstance()
5012     self.needed_locks[locking.LEVEL_NODE] = []
5013     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5014
5015   def DeclareLocks(self, level):
5016     if level == locking.LEVEL_NODE:
5017       self._LockInstancesNodes()
5018
5019   def CheckPrereq(self):
5020     """Check prerequisites.
5021
5022     This checks that the instance is in the cluster.
5023
5024     """
5025     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5026     assert self.instance is not None, \
5027       "Cannot retrieve locked instance %s" % self.op.instance_name
5028     _CheckNodeOnline(self, self.instance.primary_node)
5029
5030   def Exec(self, feedback_fn):
5031     """Activate the disks.
5032
5033     """
5034     disks_ok, disks_info = \
5035               _AssembleInstanceDisks(self, self.instance,
5036                                      ignore_size=self.op.ignore_size)
5037     if not disks_ok:
5038       raise errors.OpExecError("Cannot activate block devices")
5039
5040     return disks_info
5041
5042
5043 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5044                            ignore_size=False):
5045   """Prepare the block devices for an instance.
5046
5047   This sets up the block devices on all nodes.
5048
5049   @type lu: L{LogicalUnit}
5050   @param lu: the logical unit on whose behalf we execute
5051   @type instance: L{objects.Instance}
5052   @param instance: the instance for whose disks we assemble
5053   @type disks: list of L{objects.Disk} or None
5054   @param disks: which disks to assemble (or all, if None)
5055   @type ignore_secondaries: boolean
5056   @param ignore_secondaries: if true, errors on secondary nodes
5057       won't result in an error return from the function
5058   @type ignore_size: boolean
5059   @param ignore_size: if true, the current known size of the disk
5060       will not be used during the disk activation, useful for cases
5061       when the size is wrong
5062   @return: False if the operation failed, otherwise a list of
5063       (host, instance_visible_name, node_visible_name)
5064       with the mapping from node devices to instance devices
5065
5066   """
5067   device_info = []
5068   disks_ok = True
5069   iname = instance.name
5070   disks = _ExpandCheckDisks(instance, disks)
5071
5072   # With the two passes mechanism we try to reduce the window of
5073   # opportunity for the race condition of switching DRBD to primary
5074   # before handshaking occured, but we do not eliminate it
5075
5076   # The proper fix would be to wait (with some limits) until the
5077   # connection has been made and drbd transitions from WFConnection
5078   # into any other network-connected state (Connected, SyncTarget,
5079   # SyncSource, etc.)
5080
5081   # 1st pass, assemble on all nodes in secondary mode
5082   for idx, inst_disk in enumerate(disks):
5083     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5084       if ignore_size:
5085         node_disk = node_disk.Copy()
5086         node_disk.UnsetSize()
5087       lu.cfg.SetDiskID(node_disk, node)
5088       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5089       msg = result.fail_msg
5090       if msg:
5091         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5092                            " (is_primary=False, pass=1): %s",
5093                            inst_disk.iv_name, node, msg)
5094         if not ignore_secondaries:
5095           disks_ok = False
5096
5097   # FIXME: race condition on drbd migration to primary
5098
5099   # 2nd pass, do only the primary node
5100   for idx, inst_disk in enumerate(disks):
5101     dev_path = None
5102
5103     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5104       if node != instance.primary_node:
5105         continue
5106       if ignore_size:
5107         node_disk = node_disk.Copy()
5108         node_disk.UnsetSize()
5109       lu.cfg.SetDiskID(node_disk, node)
5110       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5111       msg = result.fail_msg
5112       if msg:
5113         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5114                            " (is_primary=True, pass=2): %s",
5115                            inst_disk.iv_name, node, msg)
5116         disks_ok = False
5117       else:
5118         dev_path = result.payload
5119
5120     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5121
5122   # leave the disks configured for the primary node
5123   # this is a workaround that would be fixed better by
5124   # improving the logical/physical id handling
5125   for disk in disks:
5126     lu.cfg.SetDiskID(disk, instance.primary_node)
5127
5128   return disks_ok, device_info
5129
5130
5131 def _StartInstanceDisks(lu, instance, force):
5132   """Start the disks of an instance.
5133
5134   """
5135   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5136                                            ignore_secondaries=force)
5137   if not disks_ok:
5138     _ShutdownInstanceDisks(lu, instance)
5139     if force is not None and not force:
5140       lu.proc.LogWarning("", hint="If the message above refers to a"
5141                          " secondary node,"
5142                          " you can retry the operation using '--force'.")
5143     raise errors.OpExecError("Disk consistency error")
5144
5145
5146 class LUInstanceDeactivateDisks(NoHooksLU):
5147   """Shutdown an instance's disks.
5148
5149   """
5150   REQ_BGL = False
5151
5152   def ExpandNames(self):
5153     self._ExpandAndLockInstance()
5154     self.needed_locks[locking.LEVEL_NODE] = []
5155     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5156
5157   def DeclareLocks(self, level):
5158     if level == locking.LEVEL_NODE:
5159       self._LockInstancesNodes()
5160
5161   def CheckPrereq(self):
5162     """Check prerequisites.
5163
5164     This checks that the instance is in the cluster.
5165
5166     """
5167     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5168     assert self.instance is not None, \
5169       "Cannot retrieve locked instance %s" % self.op.instance_name
5170
5171   def Exec(self, feedback_fn):
5172     """Deactivate the disks
5173
5174     """
5175     instance = self.instance
5176     if self.op.force:
5177       _ShutdownInstanceDisks(self, instance)
5178     else:
5179       _SafeShutdownInstanceDisks(self, instance)
5180
5181
5182 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5183   """Shutdown block devices of an instance.
5184
5185   This function checks if an instance is running, before calling
5186   _ShutdownInstanceDisks.
5187
5188   """
5189   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5190   _ShutdownInstanceDisks(lu, instance, disks=disks)
5191
5192
5193 def _ExpandCheckDisks(instance, disks):
5194   """Return the instance disks selected by the disks list
5195
5196   @type disks: list of L{objects.Disk} or None
5197   @param disks: selected disks
5198   @rtype: list of L{objects.Disk}
5199   @return: selected instance disks to act on
5200
5201   """
5202   if disks is None:
5203     return instance.disks
5204   else:
5205     if not set(disks).issubset(instance.disks):
5206       raise errors.ProgrammerError("Can only act on disks belonging to the"
5207                                    " target instance")
5208     return disks
5209
5210
5211 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5212   """Shutdown block devices of an instance.
5213
5214   This does the shutdown on all nodes of the instance.
5215
5216   If the ignore_primary is false, errors on the primary node are
5217   ignored.
5218
5219   """
5220   all_result = True
5221   disks = _ExpandCheckDisks(instance, disks)
5222
5223   for disk in disks:
5224     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5225       lu.cfg.SetDiskID(top_disk, node)
5226       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5227       msg = result.fail_msg
5228       if msg:
5229         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5230                       disk.iv_name, node, msg)
5231         if ((node == instance.primary_node and not ignore_primary) or
5232             (node != instance.primary_node and not result.offline)):
5233           all_result = False
5234   return all_result
5235
5236
5237 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5238   """Checks if a node has enough free memory.
5239
5240   This function check if a given node has the needed amount of free
5241   memory. In case the node has less memory or we cannot get the
5242   information from the node, this function raise an OpPrereqError
5243   exception.
5244
5245   @type lu: C{LogicalUnit}
5246   @param lu: a logical unit from which we get configuration data
5247   @type node: C{str}
5248   @param node: the node to check
5249   @type reason: C{str}
5250   @param reason: string to use in the error message
5251   @type requested: C{int}
5252   @param requested: the amount of memory in MiB to check for
5253   @type hypervisor_name: C{str}
5254   @param hypervisor_name: the hypervisor to ask for memory stats
5255   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5256       we cannot check the node
5257
5258   """
5259   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5260   nodeinfo[node].Raise("Can't get data from node %s" % node,
5261                        prereq=True, ecode=errors.ECODE_ENVIRON)
5262   free_mem = nodeinfo[node].payload.get('memory_free', None)
5263   if not isinstance(free_mem, int):
5264     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5265                                " was '%s'" % (node, free_mem),
5266                                errors.ECODE_ENVIRON)
5267   if requested > free_mem:
5268     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5269                                " needed %s MiB, available %s MiB" %
5270                                (node, reason, requested, free_mem),
5271                                errors.ECODE_NORES)
5272
5273
5274 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5275   """Checks if nodes have enough free disk space in the all VGs.
5276
5277   This function check if all given nodes have the needed amount of
5278   free disk. In case any node has less disk or we cannot get the
5279   information from the node, this function raise an OpPrereqError
5280   exception.
5281
5282   @type lu: C{LogicalUnit}
5283   @param lu: a logical unit from which we get configuration data
5284   @type nodenames: C{list}
5285   @param nodenames: the list of node names to check
5286   @type req_sizes: C{dict}
5287   @param req_sizes: the hash of vg and corresponding amount of disk in
5288       MiB to check for
5289   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5290       or we cannot check the node
5291
5292   """
5293   for vg, req_size in req_sizes.items():
5294     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5295
5296
5297 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5298   """Checks if nodes have enough free disk space in the specified VG.
5299
5300   This function check if all given nodes have the needed amount of
5301   free disk. In case any node has less disk or we cannot get the
5302   information from the node, this function raise an OpPrereqError
5303   exception.
5304
5305   @type lu: C{LogicalUnit}
5306   @param lu: a logical unit from which we get configuration data
5307   @type nodenames: C{list}
5308   @param nodenames: the list of node names to check
5309   @type vg: C{str}
5310   @param vg: the volume group to check
5311   @type requested: C{int}
5312   @param requested: the amount of disk in MiB to check for
5313   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5314       or we cannot check the node
5315
5316   """
5317   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5318   for node in nodenames:
5319     info = nodeinfo[node]
5320     info.Raise("Cannot get current information from node %s" % node,
5321                prereq=True, ecode=errors.ECODE_ENVIRON)
5322     vg_free = info.payload.get("vg_free", None)
5323     if not isinstance(vg_free, int):
5324       raise errors.OpPrereqError("Can't compute free disk space on node"
5325                                  " %s for vg %s, result was '%s'" %
5326                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5327     if requested > vg_free:
5328       raise errors.OpPrereqError("Not enough disk space on target node %s"
5329                                  " vg %s: required %d MiB, available %d MiB" %
5330                                  (node, vg, requested, vg_free),
5331                                  errors.ECODE_NORES)
5332
5333
5334 class LUInstanceStartup(LogicalUnit):
5335   """Starts an instance.
5336
5337   """
5338   HPATH = "instance-start"
5339   HTYPE = constants.HTYPE_INSTANCE
5340   REQ_BGL = False
5341
5342   def CheckArguments(self):
5343     # extra beparams
5344     if self.op.beparams:
5345       # fill the beparams dict
5346       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5347
5348   def ExpandNames(self):
5349     self._ExpandAndLockInstance()
5350
5351   def BuildHooksEnv(self):
5352     """Build hooks env.
5353
5354     This runs on master, primary and secondary nodes of the instance.
5355
5356     """
5357     env = {
5358       "FORCE": self.op.force,
5359       }
5360
5361     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5362
5363     return env
5364
5365   def BuildHooksNodes(self):
5366     """Build hooks nodes.
5367
5368     """
5369     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5370     return (nl, nl)
5371
5372   def CheckPrereq(self):
5373     """Check prerequisites.
5374
5375     This checks that the instance is in the cluster.
5376
5377     """
5378     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5379     assert self.instance is not None, \
5380       "Cannot retrieve locked instance %s" % self.op.instance_name
5381
5382     # extra hvparams
5383     if self.op.hvparams:
5384       # check hypervisor parameter syntax (locally)
5385       cluster = self.cfg.GetClusterInfo()
5386       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5387       filled_hvp = cluster.FillHV(instance)
5388       filled_hvp.update(self.op.hvparams)
5389       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5390       hv_type.CheckParameterSyntax(filled_hvp)
5391       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5392
5393     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5394
5395     if self.primary_offline and self.op.ignore_offline_nodes:
5396       self.proc.LogWarning("Ignoring offline primary node")
5397
5398       if self.op.hvparams or self.op.beparams:
5399         self.proc.LogWarning("Overridden parameters are ignored")
5400     else:
5401       _CheckNodeOnline(self, instance.primary_node)
5402
5403       bep = self.cfg.GetClusterInfo().FillBE(instance)
5404
5405       # check bridges existence
5406       _CheckInstanceBridgesExist(self, instance)
5407
5408       remote_info = self.rpc.call_instance_info(instance.primary_node,
5409                                                 instance.name,
5410                                                 instance.hypervisor)
5411       remote_info.Raise("Error checking node %s" % instance.primary_node,
5412                         prereq=True, ecode=errors.ECODE_ENVIRON)
5413       if not remote_info.payload: # not running already
5414         _CheckNodeFreeMemory(self, instance.primary_node,
5415                              "starting instance %s" % instance.name,
5416                              bep[constants.BE_MEMORY], instance.hypervisor)
5417
5418   def Exec(self, feedback_fn):
5419     """Start the instance.
5420
5421     """
5422     instance = self.instance
5423     force = self.op.force
5424
5425     self.cfg.MarkInstanceUp(instance.name)
5426
5427     if self.primary_offline:
5428       assert self.op.ignore_offline_nodes
5429       self.proc.LogInfo("Primary node offline, marked instance as started")
5430     else:
5431       node_current = instance.primary_node
5432
5433       _StartInstanceDisks(self, instance, force)
5434
5435       result = self.rpc.call_instance_start(node_current, instance,
5436                                             self.op.hvparams, self.op.beparams)
5437       msg = result.fail_msg
5438       if msg:
5439         _ShutdownInstanceDisks(self, instance)
5440         raise errors.OpExecError("Could not start instance: %s" % msg)
5441
5442
5443 class LUInstanceReboot(LogicalUnit):
5444   """Reboot an instance.
5445
5446   """
5447   HPATH = "instance-reboot"
5448   HTYPE = constants.HTYPE_INSTANCE
5449   REQ_BGL = False
5450
5451   def ExpandNames(self):
5452     self._ExpandAndLockInstance()
5453
5454   def BuildHooksEnv(self):
5455     """Build hooks env.
5456
5457     This runs on master, primary and secondary nodes of the instance.
5458
5459     """
5460     env = {
5461       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5462       "REBOOT_TYPE": self.op.reboot_type,
5463       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5464       }
5465
5466     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5467
5468     return env
5469
5470   def BuildHooksNodes(self):
5471     """Build hooks nodes.
5472
5473     """
5474     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5475     return (nl, nl)
5476
5477   def CheckPrereq(self):
5478     """Check prerequisites.
5479
5480     This checks that the instance is in the cluster.
5481
5482     """
5483     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5484     assert self.instance is not None, \
5485       "Cannot retrieve locked instance %s" % self.op.instance_name
5486
5487     _CheckNodeOnline(self, instance.primary_node)
5488
5489     # check bridges existence
5490     _CheckInstanceBridgesExist(self, instance)
5491
5492   def Exec(self, feedback_fn):
5493     """Reboot the instance.
5494
5495     """
5496     instance = self.instance
5497     ignore_secondaries = self.op.ignore_secondaries
5498     reboot_type = self.op.reboot_type
5499
5500     remote_info = self.rpc.call_instance_info(instance.primary_node,
5501                                               instance.name,
5502                                               instance.hypervisor)
5503     remote_info.Raise("Error checking node %s" % instance.primary_node)
5504     instance_running = bool(remote_info.payload)
5505
5506     node_current = instance.primary_node
5507
5508     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5509                                             constants.INSTANCE_REBOOT_HARD]:
5510       for disk in instance.disks:
5511         self.cfg.SetDiskID(disk, node_current)
5512       result = self.rpc.call_instance_reboot(node_current, instance,
5513                                              reboot_type,
5514                                              self.op.shutdown_timeout)
5515       result.Raise("Could not reboot instance")
5516     else:
5517       if instance_running:
5518         result = self.rpc.call_instance_shutdown(node_current, instance,
5519                                                  self.op.shutdown_timeout)
5520         result.Raise("Could not shutdown instance for full reboot")
5521         _ShutdownInstanceDisks(self, instance)
5522       else:
5523         self.LogInfo("Instance %s was already stopped, starting now",
5524                      instance.name)
5525       _StartInstanceDisks(self, instance, ignore_secondaries)
5526       result = self.rpc.call_instance_start(node_current, instance, None, None)
5527       msg = result.fail_msg
5528       if msg:
5529         _ShutdownInstanceDisks(self, instance)
5530         raise errors.OpExecError("Could not start instance for"
5531                                  " full reboot: %s" % msg)
5532
5533     self.cfg.MarkInstanceUp(instance.name)
5534
5535
5536 class LUInstanceShutdown(LogicalUnit):
5537   """Shutdown an instance.
5538
5539   """
5540   HPATH = "instance-stop"
5541   HTYPE = constants.HTYPE_INSTANCE
5542   REQ_BGL = False
5543
5544   def ExpandNames(self):
5545     self._ExpandAndLockInstance()
5546
5547   def BuildHooksEnv(self):
5548     """Build hooks env.
5549
5550     This runs on master, primary and secondary nodes of the instance.
5551
5552     """
5553     env = _BuildInstanceHookEnvByObject(self, self.instance)
5554     env["TIMEOUT"] = self.op.timeout
5555     return env
5556
5557   def BuildHooksNodes(self):
5558     """Build hooks nodes.
5559
5560     """
5561     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5562     return (nl, nl)
5563
5564   def CheckPrereq(self):
5565     """Check prerequisites.
5566
5567     This checks that the instance is in the cluster.
5568
5569     """
5570     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5571     assert self.instance is not None, \
5572       "Cannot retrieve locked instance %s" % self.op.instance_name
5573
5574     self.primary_offline = \
5575       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5576
5577     if self.primary_offline and self.op.ignore_offline_nodes:
5578       self.proc.LogWarning("Ignoring offline primary node")
5579     else:
5580       _CheckNodeOnline(self, self.instance.primary_node)
5581
5582   def Exec(self, feedback_fn):
5583     """Shutdown the instance.
5584
5585     """
5586     instance = self.instance
5587     node_current = instance.primary_node
5588     timeout = self.op.timeout
5589
5590     self.cfg.MarkInstanceDown(instance.name)
5591
5592     if self.primary_offline:
5593       assert self.op.ignore_offline_nodes
5594       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5595     else:
5596       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5597       msg = result.fail_msg
5598       if msg:
5599         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5600
5601       _ShutdownInstanceDisks(self, instance)
5602
5603
5604 class LUInstanceReinstall(LogicalUnit):
5605   """Reinstall an instance.
5606
5607   """
5608   HPATH = "instance-reinstall"
5609   HTYPE = constants.HTYPE_INSTANCE
5610   REQ_BGL = False
5611
5612   def ExpandNames(self):
5613     self._ExpandAndLockInstance()
5614
5615   def BuildHooksEnv(self):
5616     """Build hooks env.
5617
5618     This runs on master, primary and secondary nodes of the instance.
5619
5620     """
5621     return _BuildInstanceHookEnvByObject(self, self.instance)
5622
5623   def BuildHooksNodes(self):
5624     """Build hooks nodes.
5625
5626     """
5627     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5628     return (nl, nl)
5629
5630   def CheckPrereq(self):
5631     """Check prerequisites.
5632
5633     This checks that the instance is in the cluster and is not running.
5634
5635     """
5636     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5637     assert instance is not None, \
5638       "Cannot retrieve locked instance %s" % self.op.instance_name
5639     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5640                      " offline, cannot reinstall")
5641     for node in instance.secondary_nodes:
5642       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5643                        " cannot reinstall")
5644
5645     if instance.disk_template == constants.DT_DISKLESS:
5646       raise errors.OpPrereqError("Instance '%s' has no disks" %
5647                                  self.op.instance_name,
5648                                  errors.ECODE_INVAL)
5649     _CheckInstanceDown(self, instance, "cannot reinstall")
5650
5651     if self.op.os_type is not None:
5652       # OS verification
5653       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5654       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5655       instance_os = self.op.os_type
5656     else:
5657       instance_os = instance.os
5658
5659     nodelist = list(instance.all_nodes)
5660
5661     if self.op.osparams:
5662       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5663       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5664       self.os_inst = i_osdict # the new dict (without defaults)
5665     else:
5666       self.os_inst = None
5667
5668     self.instance = instance
5669
5670   def Exec(self, feedback_fn):
5671     """Reinstall the instance.
5672
5673     """
5674     inst = self.instance
5675
5676     if self.op.os_type is not None:
5677       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5678       inst.os = self.op.os_type
5679       # Write to configuration
5680       self.cfg.Update(inst, feedback_fn)
5681
5682     _StartInstanceDisks(self, inst, None)
5683     try:
5684       feedback_fn("Running the instance OS create scripts...")
5685       # FIXME: pass debug option from opcode to backend
5686       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5687                                              self.op.debug_level,
5688                                              osparams=self.os_inst)
5689       result.Raise("Could not install OS for instance %s on node %s" %
5690                    (inst.name, inst.primary_node))
5691     finally:
5692       _ShutdownInstanceDisks(self, inst)
5693
5694
5695 class LUInstanceRecreateDisks(LogicalUnit):
5696   """Recreate an instance's missing disks.
5697
5698   """
5699   HPATH = "instance-recreate-disks"
5700   HTYPE = constants.HTYPE_INSTANCE
5701   REQ_BGL = False
5702
5703   def ExpandNames(self):
5704     self._ExpandAndLockInstance()
5705
5706   def BuildHooksEnv(self):
5707     """Build hooks env.
5708
5709     This runs on master, primary and secondary nodes of the instance.
5710
5711     """
5712     return _BuildInstanceHookEnvByObject(self, self.instance)
5713
5714   def BuildHooksNodes(self):
5715     """Build hooks nodes.
5716
5717     """
5718     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5719     return (nl, nl)
5720
5721   def CheckPrereq(self):
5722     """Check prerequisites.
5723
5724     This checks that the instance is in the cluster and is not running.
5725
5726     """
5727     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5728     assert instance is not None, \
5729       "Cannot retrieve locked instance %s" % self.op.instance_name
5730     _CheckNodeOnline(self, instance.primary_node)
5731
5732     if instance.disk_template == constants.DT_DISKLESS:
5733       raise errors.OpPrereqError("Instance '%s' has no disks" %
5734                                  self.op.instance_name, errors.ECODE_INVAL)
5735     _CheckInstanceDown(self, instance, "cannot recreate disks")
5736
5737     if not self.op.disks:
5738       self.op.disks = range(len(instance.disks))
5739     else:
5740       for idx in self.op.disks:
5741         if idx >= len(instance.disks):
5742           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
5743                                      errors.ECODE_INVAL)
5744
5745     self.instance = instance
5746
5747   def Exec(self, feedback_fn):
5748     """Recreate the disks.
5749
5750     """
5751     to_skip = []
5752     for idx, _ in enumerate(self.instance.disks):
5753       if idx not in self.op.disks: # disk idx has not been passed in
5754         to_skip.append(idx)
5755         continue
5756
5757     _CreateDisks(self, self.instance, to_skip=to_skip)
5758
5759
5760 class LUInstanceRename(LogicalUnit):
5761   """Rename an instance.
5762
5763   """
5764   HPATH = "instance-rename"
5765   HTYPE = constants.HTYPE_INSTANCE
5766
5767   def CheckArguments(self):
5768     """Check arguments.
5769
5770     """
5771     if self.op.ip_check and not self.op.name_check:
5772       # TODO: make the ip check more flexible and not depend on the name check
5773       raise errors.OpPrereqError("IP address check requires a name check",
5774                                  errors.ECODE_INVAL)
5775
5776   def BuildHooksEnv(self):
5777     """Build hooks env.
5778
5779     This runs on master, primary and secondary nodes of the instance.
5780
5781     """
5782     env = _BuildInstanceHookEnvByObject(self, self.instance)
5783     env["INSTANCE_NEW_NAME"] = self.op.new_name
5784     return env
5785
5786   def BuildHooksNodes(self):
5787     """Build hooks nodes.
5788
5789     """
5790     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5791     return (nl, nl)
5792
5793   def CheckPrereq(self):
5794     """Check prerequisites.
5795
5796     This checks that the instance is in the cluster and is not running.
5797
5798     """
5799     self.op.instance_name = _ExpandInstanceName(self.cfg,
5800                                                 self.op.instance_name)
5801     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5802     assert instance is not None
5803     _CheckNodeOnline(self, instance.primary_node)
5804     _CheckInstanceDown(self, instance, "cannot rename")
5805     self.instance = instance
5806
5807     new_name = self.op.new_name
5808     if self.op.name_check:
5809       hostname = netutils.GetHostname(name=new_name)
5810       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5811                    hostname.name)
5812       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5813         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5814                                     " same as given hostname '%s'") %
5815                                     (hostname.name, self.op.new_name),
5816                                     errors.ECODE_INVAL)
5817       new_name = self.op.new_name = hostname.name
5818       if (self.op.ip_check and
5819           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5820         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5821                                    (hostname.ip, new_name),
5822                                    errors.ECODE_NOTUNIQUE)
5823
5824     instance_list = self.cfg.GetInstanceList()
5825     if new_name in instance_list and new_name != instance.name:
5826       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5827                                  new_name, errors.ECODE_EXISTS)
5828
5829   def Exec(self, feedback_fn):
5830     """Rename the instance.
5831
5832     """
5833     inst = self.instance
5834     old_name = inst.name
5835
5836     rename_file_storage = False
5837     if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5838         self.op.new_name != inst.name):
5839       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5840       rename_file_storage = True
5841
5842     self.cfg.RenameInstance(inst.name, self.op.new_name)
5843     # Change the instance lock. This is definitely safe while we hold the BGL.
5844     # Otherwise the new lock would have to be added in acquired mode.
5845     assert self.REQ_BGL
5846     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
5847     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5848
5849     # re-read the instance from the configuration after rename
5850     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5851
5852     if rename_file_storage:
5853       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5854       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5855                                                      old_file_storage_dir,
5856                                                      new_file_storage_dir)
5857       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5858                    " (but the instance has been renamed in Ganeti)" %
5859                    (inst.primary_node, old_file_storage_dir,
5860                     new_file_storage_dir))
5861
5862     _StartInstanceDisks(self, inst, None)
5863     try:
5864       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5865                                                  old_name, self.op.debug_level)
5866       msg = result.fail_msg
5867       if msg:
5868         msg = ("Could not run OS rename script for instance %s on node %s"
5869                " (but the instance has been renamed in Ganeti): %s" %
5870                (inst.name, inst.primary_node, msg))
5871         self.proc.LogWarning(msg)
5872     finally:
5873       _ShutdownInstanceDisks(self, inst)
5874
5875     return inst.name
5876
5877
5878 class LUInstanceRemove(LogicalUnit):
5879   """Remove an instance.
5880
5881   """
5882   HPATH = "instance-remove"
5883   HTYPE = constants.HTYPE_INSTANCE
5884   REQ_BGL = False
5885
5886   def ExpandNames(self):
5887     self._ExpandAndLockInstance()
5888     self.needed_locks[locking.LEVEL_NODE] = []
5889     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5890
5891   def DeclareLocks(self, level):
5892     if level == locking.LEVEL_NODE:
5893       self._LockInstancesNodes()
5894
5895   def BuildHooksEnv(self):
5896     """Build hooks env.
5897
5898     This runs on master, primary and secondary nodes of the instance.
5899
5900     """
5901     env = _BuildInstanceHookEnvByObject(self, self.instance)
5902     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5903     return env
5904
5905   def BuildHooksNodes(self):
5906     """Build hooks nodes.
5907
5908     """
5909     nl = [self.cfg.GetMasterNode()]
5910     nl_post = list(self.instance.all_nodes) + nl
5911     return (nl, nl_post)
5912
5913   def CheckPrereq(self):
5914     """Check prerequisites.
5915
5916     This checks that the instance is in the cluster.
5917
5918     """
5919     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5920     assert self.instance is not None, \
5921       "Cannot retrieve locked instance %s" % self.op.instance_name
5922
5923   def Exec(self, feedback_fn):
5924     """Remove the instance.
5925
5926     """
5927     instance = self.instance
5928     logging.info("Shutting down instance %s on node %s",
5929                  instance.name, instance.primary_node)
5930
5931     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5932                                              self.op.shutdown_timeout)
5933     msg = result.fail_msg
5934     if msg:
5935       if self.op.ignore_failures:
5936         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5937       else:
5938         raise errors.OpExecError("Could not shutdown instance %s on"
5939                                  " node %s: %s" %
5940                                  (instance.name, instance.primary_node, msg))
5941
5942     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5943
5944
5945 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5946   """Utility function to remove an instance.
5947
5948   """
5949   logging.info("Removing block devices for instance %s", instance.name)
5950
5951   if not _RemoveDisks(lu, instance):
5952     if not ignore_failures:
5953       raise errors.OpExecError("Can't remove instance's disks")
5954     feedback_fn("Warning: can't remove instance's disks")
5955
5956   logging.info("Removing instance %s out of cluster config", instance.name)
5957
5958   lu.cfg.RemoveInstance(instance.name)
5959
5960   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5961     "Instance lock removal conflict"
5962
5963   # Remove lock for the instance
5964   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5965
5966
5967 class LUInstanceQuery(NoHooksLU):
5968   """Logical unit for querying instances.
5969
5970   """
5971   # pylint: disable-msg=W0142
5972   REQ_BGL = False
5973
5974   def CheckArguments(self):
5975     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5976                              self.op.output_fields, self.op.use_locking)
5977
5978   def ExpandNames(self):
5979     self.iq.ExpandNames(self)
5980
5981   def DeclareLocks(self, level):
5982     self.iq.DeclareLocks(self, level)
5983
5984   def Exec(self, feedback_fn):
5985     return self.iq.OldStyleQuery(self)
5986
5987
5988 class LUInstanceFailover(LogicalUnit):
5989   """Failover an instance.
5990
5991   """
5992   HPATH = "instance-failover"
5993   HTYPE = constants.HTYPE_INSTANCE
5994   REQ_BGL = False
5995
5996   def CheckArguments(self):
5997     """Check the arguments.
5998
5999     """
6000     self.iallocator = getattr(self.op, "iallocator", None)
6001     self.target_node = getattr(self.op, "target_node", None)
6002
6003   def ExpandNames(self):
6004     self._ExpandAndLockInstance()
6005
6006     if self.op.target_node is not None:
6007       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6008
6009     self.needed_locks[locking.LEVEL_NODE] = []
6010     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6011
6012     ignore_consistency = self.op.ignore_consistency
6013     shutdown_timeout = self.op.shutdown_timeout
6014     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6015                                        cleanup=False,
6016                                        failover=True,
6017                                        ignore_consistency=ignore_consistency,
6018                                        shutdown_timeout=shutdown_timeout)
6019     self.tasklets = [self._migrater]
6020
6021   def DeclareLocks(self, level):
6022     if level == locking.LEVEL_NODE:
6023       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6024       if instance.disk_template in constants.DTS_EXT_MIRROR:
6025         if self.op.target_node is None:
6026           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6027         else:
6028           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6029                                                    self.op.target_node]
6030         del self.recalculate_locks[locking.LEVEL_NODE]
6031       else:
6032         self._LockInstancesNodes()
6033
6034   def BuildHooksEnv(self):
6035     """Build hooks env.
6036
6037     This runs on master, primary and secondary nodes of the instance.
6038
6039     """
6040     instance = self._migrater.instance
6041     source_node = instance.primary_node
6042     target_node = self.op.target_node
6043     env = {
6044       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6045       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6046       "OLD_PRIMARY": source_node,
6047       "NEW_PRIMARY": target_node,
6048       }
6049
6050     if instance.disk_template in constants.DTS_INT_MIRROR:
6051       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6052       env["NEW_SECONDARY"] = source_node
6053     else:
6054       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6055
6056     env.update(_BuildInstanceHookEnvByObject(self, instance))
6057
6058     return env
6059
6060   def BuildHooksNodes(self):
6061     """Build hooks nodes.
6062
6063     """
6064     instance = self._migrater.instance
6065     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6066     return (nl, nl + [instance.primary_node])
6067
6068
6069 class LUInstanceMigrate(LogicalUnit):
6070   """Migrate an instance.
6071
6072   This is migration without shutting down, compared to the failover,
6073   which is done with shutdown.
6074
6075   """
6076   HPATH = "instance-migrate"
6077   HTYPE = constants.HTYPE_INSTANCE
6078   REQ_BGL = False
6079
6080   def ExpandNames(self):
6081     self._ExpandAndLockInstance()
6082
6083     if self.op.target_node is not None:
6084       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6085
6086     self.needed_locks[locking.LEVEL_NODE] = []
6087     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6088
6089     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6090                                        cleanup=self.op.cleanup,
6091                                        failover=False,
6092                                        fallback=self.op.allow_failover)
6093     self.tasklets = [self._migrater]
6094
6095   def DeclareLocks(self, level):
6096     if level == locking.LEVEL_NODE:
6097       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6098       if instance.disk_template in constants.DTS_EXT_MIRROR:
6099         if self.op.target_node is None:
6100           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6101         else:
6102           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6103                                                    self.op.target_node]
6104         del self.recalculate_locks[locking.LEVEL_NODE]
6105       else:
6106         self._LockInstancesNodes()
6107
6108   def BuildHooksEnv(self):
6109     """Build hooks env.
6110
6111     This runs on master, primary and secondary nodes of the instance.
6112
6113     """
6114     instance = self._migrater.instance
6115     source_node = instance.primary_node
6116     target_node = self.op.target_node
6117     env = _BuildInstanceHookEnvByObject(self, instance)
6118     env.update({
6119       "MIGRATE_LIVE": self._migrater.live,
6120       "MIGRATE_CLEANUP": self.op.cleanup,
6121       "OLD_PRIMARY": source_node,
6122       "NEW_PRIMARY": target_node,
6123       })
6124
6125     if instance.disk_template in constants.DTS_INT_MIRROR:
6126       env["OLD_SECONDARY"] = target_node
6127       env["NEW_SECONDARY"] = source_node
6128     else:
6129       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6130
6131     return env
6132
6133   def BuildHooksNodes(self):
6134     """Build hooks nodes.
6135
6136     """
6137     instance = self._migrater.instance
6138     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6139     return (nl, nl + [instance.primary_node])
6140
6141
6142 class LUInstanceMove(LogicalUnit):
6143   """Move an instance by data-copying.
6144
6145   """
6146   HPATH = "instance-move"
6147   HTYPE = constants.HTYPE_INSTANCE
6148   REQ_BGL = False
6149
6150   def ExpandNames(self):
6151     self._ExpandAndLockInstance()
6152     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6153     self.op.target_node = target_node
6154     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6155     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6156
6157   def DeclareLocks(self, level):
6158     if level == locking.LEVEL_NODE:
6159       self._LockInstancesNodes(primary_only=True)
6160
6161   def BuildHooksEnv(self):
6162     """Build hooks env.
6163
6164     This runs on master, primary and secondary nodes of the instance.
6165
6166     """
6167     env = {
6168       "TARGET_NODE": self.op.target_node,
6169       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6170       }
6171     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6172     return env
6173
6174   def BuildHooksNodes(self):
6175     """Build hooks nodes.
6176
6177     """
6178     nl = [
6179       self.cfg.GetMasterNode(),
6180       self.instance.primary_node,
6181       self.op.target_node,
6182       ]
6183     return (nl, nl)
6184
6185   def CheckPrereq(self):
6186     """Check prerequisites.
6187
6188     This checks that the instance is in the cluster.
6189
6190     """
6191     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6192     assert self.instance is not None, \
6193       "Cannot retrieve locked instance %s" % self.op.instance_name
6194
6195     node = self.cfg.GetNodeInfo(self.op.target_node)
6196     assert node is not None, \
6197       "Cannot retrieve locked node %s" % self.op.target_node
6198
6199     self.target_node = target_node = node.name
6200
6201     if target_node == instance.primary_node:
6202       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6203                                  (instance.name, target_node),
6204                                  errors.ECODE_STATE)
6205
6206     bep = self.cfg.GetClusterInfo().FillBE(instance)
6207
6208     for idx, dsk in enumerate(instance.disks):
6209       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6210         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6211                                    " cannot copy" % idx, errors.ECODE_STATE)
6212
6213     _CheckNodeOnline(self, target_node)
6214     _CheckNodeNotDrained(self, target_node)
6215     _CheckNodeVmCapable(self, target_node)
6216
6217     if instance.admin_up:
6218       # check memory requirements on the secondary node
6219       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6220                            instance.name, bep[constants.BE_MEMORY],
6221                            instance.hypervisor)
6222     else:
6223       self.LogInfo("Not checking memory on the secondary node as"
6224                    " instance will not be started")
6225
6226     # check bridge existance
6227     _CheckInstanceBridgesExist(self, instance, node=target_node)
6228
6229   def Exec(self, feedback_fn):
6230     """Move an instance.
6231
6232     The move is done by shutting it down on its present node, copying
6233     the data over (slow) and starting it on the new node.
6234
6235     """
6236     instance = self.instance
6237
6238     source_node = instance.primary_node
6239     target_node = self.target_node
6240
6241     self.LogInfo("Shutting down instance %s on source node %s",
6242                  instance.name, source_node)
6243
6244     result = self.rpc.call_instance_shutdown(source_node, instance,
6245                                              self.op.shutdown_timeout)
6246     msg = result.fail_msg
6247     if msg:
6248       if self.op.ignore_consistency:
6249         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6250                              " Proceeding anyway. Please make sure node"
6251                              " %s is down. Error details: %s",
6252                              instance.name, source_node, source_node, msg)
6253       else:
6254         raise errors.OpExecError("Could not shutdown instance %s on"
6255                                  " node %s: %s" %
6256                                  (instance.name, source_node, msg))
6257
6258     # create the target disks
6259     try:
6260       _CreateDisks(self, instance, target_node=target_node)
6261     except errors.OpExecError:
6262       self.LogWarning("Device creation failed, reverting...")
6263       try:
6264         _RemoveDisks(self, instance, target_node=target_node)
6265       finally:
6266         self.cfg.ReleaseDRBDMinors(instance.name)
6267         raise
6268
6269     cluster_name = self.cfg.GetClusterInfo().cluster_name
6270
6271     errs = []
6272     # activate, get path, copy the data over
6273     for idx, disk in enumerate(instance.disks):
6274       self.LogInfo("Copying data for disk %d", idx)
6275       result = self.rpc.call_blockdev_assemble(target_node, disk,
6276                                                instance.name, True, idx)
6277       if result.fail_msg:
6278         self.LogWarning("Can't assemble newly created disk %d: %s",
6279                         idx, result.fail_msg)
6280         errs.append(result.fail_msg)
6281         break
6282       dev_path = result.payload
6283       result = self.rpc.call_blockdev_export(source_node, disk,
6284                                              target_node, dev_path,
6285                                              cluster_name)
6286       if result.fail_msg:
6287         self.LogWarning("Can't copy data over for disk %d: %s",
6288                         idx, result.fail_msg)
6289         errs.append(result.fail_msg)
6290         break
6291
6292     if errs:
6293       self.LogWarning("Some disks failed to copy, aborting")
6294       try:
6295         _RemoveDisks(self, instance, target_node=target_node)
6296       finally:
6297         self.cfg.ReleaseDRBDMinors(instance.name)
6298         raise errors.OpExecError("Errors during disk copy: %s" %
6299                                  (",".join(errs),))
6300
6301     instance.primary_node = target_node
6302     self.cfg.Update(instance, feedback_fn)
6303
6304     self.LogInfo("Removing the disks on the original node")
6305     _RemoveDisks(self, instance, target_node=source_node)
6306
6307     # Only start the instance if it's marked as up
6308     if instance.admin_up:
6309       self.LogInfo("Starting instance %s on node %s",
6310                    instance.name, target_node)
6311
6312       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6313                                            ignore_secondaries=True)
6314       if not disks_ok:
6315         _ShutdownInstanceDisks(self, instance)
6316         raise errors.OpExecError("Can't activate the instance's disks")
6317
6318       result = self.rpc.call_instance_start(target_node, instance, None, None)
6319       msg = result.fail_msg
6320       if msg:
6321         _ShutdownInstanceDisks(self, instance)
6322         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6323                                  (instance.name, target_node, msg))
6324
6325
6326 class LUNodeMigrate(LogicalUnit):
6327   """Migrate all instances from a node.
6328
6329   """
6330   HPATH = "node-migrate"
6331   HTYPE = constants.HTYPE_NODE
6332   REQ_BGL = False
6333
6334   def CheckArguments(self):
6335     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6336
6337   def ExpandNames(self):
6338     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6339
6340     self.needed_locks = {}
6341
6342     # Create tasklets for migrating instances for all instances on this node
6343     names = []
6344     tasklets = []
6345
6346     self.lock_all_nodes = False
6347
6348     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6349       logging.debug("Migrating instance %s", inst.name)
6350       names.append(inst.name)
6351
6352       tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False))
6353
6354       if inst.disk_template in constants.DTS_EXT_MIRROR:
6355         # We need to lock all nodes, as the iallocator will choose the
6356         # destination nodes afterwards
6357         self.lock_all_nodes = True
6358
6359     self.tasklets = tasklets
6360
6361     # Declare node locks
6362     if self.lock_all_nodes:
6363       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6364     else:
6365       self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6366       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6367
6368     # Declare instance locks
6369     self.needed_locks[locking.LEVEL_INSTANCE] = names
6370
6371   def DeclareLocks(self, level):
6372     if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6373       self._LockInstancesNodes()
6374
6375   def BuildHooksEnv(self):
6376     """Build hooks env.
6377
6378     This runs on the master, the primary and all the secondaries.
6379
6380     """
6381     return {
6382       "NODE_NAME": self.op.node_name,
6383       }
6384
6385   def BuildHooksNodes(self):
6386     """Build hooks nodes.
6387
6388     """
6389     nl = [self.cfg.GetMasterNode()]
6390     return (nl, nl)
6391
6392
6393 class TLMigrateInstance(Tasklet):
6394   """Tasklet class for instance migration.
6395
6396   @type live: boolean
6397   @ivar live: whether the migration will be done live or non-live;
6398       this variable is initalized only after CheckPrereq has run
6399   @type cleanup: boolean
6400   @ivar cleanup: Wheater we cleanup from a failed migration
6401   @type iallocator: string
6402   @ivar iallocator: The iallocator used to determine target_node
6403   @type target_node: string
6404   @ivar target_node: If given, the target_node to reallocate the instance to
6405   @type failover: boolean
6406   @ivar failover: Whether operation results in failover or migration
6407   @type fallback: boolean
6408   @ivar fallback: Whether fallback to failover is allowed if migration not
6409                   possible
6410   @type ignore_consistency: boolean
6411   @ivar ignore_consistency: Wheter we should ignore consistency between source
6412                             and target node
6413   @type shutdown_timeout: int
6414   @ivar shutdown_timeout: In case of failover timeout of the shutdown
6415
6416   """
6417   def __init__(self, lu, instance_name, cleanup=False,
6418                failover=False, fallback=False,
6419                ignore_consistency=False,
6420                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6421     """Initializes this class.
6422
6423     """
6424     Tasklet.__init__(self, lu)
6425
6426     # Parameters
6427     self.instance_name = instance_name
6428     self.cleanup = cleanup
6429     self.live = False # will be overridden later
6430     self.failover = failover
6431     self.fallback = fallback
6432     self.ignore_consistency = ignore_consistency
6433     self.shutdown_timeout = shutdown_timeout
6434
6435   def CheckPrereq(self):
6436     """Check prerequisites.
6437
6438     This checks that the instance is in the cluster.
6439
6440     """
6441     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6442     instance = self.cfg.GetInstanceInfo(instance_name)
6443     assert instance is not None
6444     self.instance = instance
6445
6446     if (not self.cleanup and not instance.admin_up and not self.failover and
6447         self.fallback):
6448       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6449                       " to failover")
6450       self.failover = True
6451
6452     if instance.disk_template not in constants.DTS_MIRRORED:
6453       if self.failover:
6454         text = "failovers"
6455       else:
6456         text = "migrations"
6457       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6458                                  " %s" % (instance.disk_template, text),
6459                                  errors.ECODE_STATE)
6460
6461     if instance.disk_template in constants.DTS_EXT_MIRROR:
6462       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6463
6464       if self.lu.op.iallocator:
6465         self._RunAllocator()
6466       else:
6467         # We set set self.target_node as it is required by
6468         # BuildHooksEnv
6469         self.target_node = self.lu.op.target_node
6470
6471       # self.target_node is already populated, either directly or by the
6472       # iallocator run
6473       target_node = self.target_node
6474
6475       if len(self.lu.tasklets) == 1:
6476         # It is safe to release locks only when we're the only tasklet
6477         # in the LU
6478         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6479                       keep=[instance.primary_node, self.target_node])
6480
6481     else:
6482       secondary_nodes = instance.secondary_nodes
6483       if not secondary_nodes:
6484         raise errors.ConfigurationError("No secondary node but using"
6485                                         " %s disk template" %
6486                                         instance.disk_template)
6487       target_node = secondary_nodes[0]
6488       if self.lu.op.iallocator or (self.lu.op.target_node and
6489                                    self.lu.op.target_node != target_node):
6490         if self.failover:
6491           text = "failed over"
6492         else:
6493           text = "migrated"
6494         raise errors.OpPrereqError("Instances with disk template %s cannot"
6495                                    " be %s to arbitrary nodes"
6496                                    " (neither an iallocator nor a target"
6497                                    " node can be passed)" %
6498                                    (instance.disk_template, text),
6499                                    errors.ECODE_INVAL)
6500
6501     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6502
6503     # check memory requirements on the secondary node
6504     if not self.failover or instance.admin_up:
6505       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6506                            instance.name, i_be[constants.BE_MEMORY],
6507                            instance.hypervisor)
6508     else:
6509       self.lu.LogInfo("Not checking memory on the secondary node as"
6510                       " instance will not be started")
6511
6512     # check bridge existance
6513     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6514
6515     if not self.cleanup:
6516       _CheckNodeNotDrained(self.lu, target_node)
6517       if not self.failover:
6518         result = self.rpc.call_instance_migratable(instance.primary_node,
6519                                                    instance)
6520         if result.fail_msg and self.fallback:
6521           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6522                           " failover")
6523           self.failover = True
6524         else:
6525           result.Raise("Can't migrate, please use failover",
6526                        prereq=True, ecode=errors.ECODE_STATE)
6527
6528     assert not (self.failover and self.cleanup)
6529
6530     if not self.failover:
6531       if self.lu.op.live is not None and self.lu.op.mode is not None:
6532         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6533                                    " parameters are accepted",
6534                                    errors.ECODE_INVAL)
6535       if self.lu.op.live is not None:
6536         if self.lu.op.live:
6537           self.lu.op.mode = constants.HT_MIGRATION_LIVE
6538         else:
6539           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6540         # reset the 'live' parameter to None so that repeated
6541         # invocations of CheckPrereq do not raise an exception
6542         self.lu.op.live = None
6543       elif self.lu.op.mode is None:
6544         # read the default value from the hypervisor
6545         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6546                                                 skip_globals=False)
6547         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6548
6549       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6550     else:
6551       # Failover is never live
6552       self.live = False
6553
6554   def _RunAllocator(self):
6555     """Run the allocator based on input opcode.
6556
6557     """
6558     ial = IAllocator(self.cfg, self.rpc,
6559                      mode=constants.IALLOCATOR_MODE_RELOC,
6560                      name=self.instance_name,
6561                      # TODO See why hail breaks with a single node below
6562                      relocate_from=[self.instance.primary_node,
6563                                     self.instance.primary_node],
6564                      )
6565
6566     ial.Run(self.lu.op.iallocator)
6567
6568     if not ial.success:
6569       raise errors.OpPrereqError("Can't compute nodes using"
6570                                  " iallocator '%s': %s" %
6571                                  (self.lu.op.iallocator, ial.info),
6572                                  errors.ECODE_NORES)
6573     if len(ial.result) != ial.required_nodes:
6574       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6575                                  " of nodes (%s), required %s" %
6576                                  (self.lu.op.iallocator, len(ial.result),
6577                                   ial.required_nodes), errors.ECODE_FAULT)
6578     self.target_node = ial.result[0]
6579     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6580                  self.instance_name, self.lu.op.iallocator,
6581                  utils.CommaJoin(ial.result))
6582
6583   def _WaitUntilSync(self):
6584     """Poll with custom rpc for disk sync.
6585
6586     This uses our own step-based rpc call.
6587
6588     """
6589     self.feedback_fn("* wait until resync is done")
6590     all_done = False
6591     while not all_done:
6592       all_done = True
6593       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6594                                             self.nodes_ip,
6595                                             self.instance.disks)
6596       min_percent = 100
6597       for node, nres in result.items():
6598         nres.Raise("Cannot resync disks on node %s" % node)
6599         node_done, node_percent = nres.payload
6600         all_done = all_done and node_done
6601         if node_percent is not None:
6602           min_percent = min(min_percent, node_percent)
6603       if not all_done:
6604         if min_percent < 100:
6605           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6606         time.sleep(2)
6607
6608   def _EnsureSecondary(self, node):
6609     """Demote a node to secondary.
6610
6611     """
6612     self.feedback_fn("* switching node %s to secondary mode" % node)
6613
6614     for dev in self.instance.disks:
6615       self.cfg.SetDiskID(dev, node)
6616
6617     result = self.rpc.call_blockdev_close(node, self.instance.name,
6618                                           self.instance.disks)
6619     result.Raise("Cannot change disk to secondary on node %s" % node)
6620
6621   def _GoStandalone(self):
6622     """Disconnect from the network.
6623
6624     """
6625     self.feedback_fn("* changing into standalone mode")
6626     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6627                                                self.instance.disks)
6628     for node, nres in result.items():
6629       nres.Raise("Cannot disconnect disks node %s" % node)
6630
6631   def _GoReconnect(self, multimaster):
6632     """Reconnect to the network.
6633
6634     """
6635     if multimaster:
6636       msg = "dual-master"
6637     else:
6638       msg = "single-master"
6639     self.feedback_fn("* changing disks into %s mode" % msg)
6640     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6641                                            self.instance.disks,
6642                                            self.instance.name, multimaster)
6643     for node, nres in result.items():
6644       nres.Raise("Cannot change disks config on node %s" % node)
6645
6646   def _ExecCleanup(self):
6647     """Try to cleanup after a failed migration.
6648
6649     The cleanup is done by:
6650       - check that the instance is running only on one node
6651         (and update the config if needed)
6652       - change disks on its secondary node to secondary
6653       - wait until disks are fully synchronized
6654       - disconnect from the network
6655       - change disks into single-master mode
6656       - wait again until disks are fully synchronized
6657
6658     """
6659     instance = self.instance
6660     target_node = self.target_node
6661     source_node = self.source_node
6662
6663     # check running on only one node
6664     self.feedback_fn("* checking where the instance actually runs"
6665                      " (if this hangs, the hypervisor might be in"
6666                      " a bad state)")
6667     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6668     for node, result in ins_l.items():
6669       result.Raise("Can't contact node %s" % node)
6670
6671     runningon_source = instance.name in ins_l[source_node].payload
6672     runningon_target = instance.name in ins_l[target_node].payload
6673
6674     if runningon_source and runningon_target:
6675       raise errors.OpExecError("Instance seems to be running on two nodes,"
6676                                " or the hypervisor is confused; you will have"
6677                                " to ensure manually that it runs only on one"
6678                                " and restart this operation")
6679
6680     if not (runningon_source or runningon_target):
6681       raise errors.OpExecError("Instance does not seem to be running at all;"
6682                                " in this case it's safer to repair by"
6683                                " running 'gnt-instance stop' to ensure disk"
6684                                " shutdown, and then restarting it")
6685
6686     if runningon_target:
6687       # the migration has actually succeeded, we need to update the config
6688       self.feedback_fn("* instance running on secondary node (%s),"
6689                        " updating config" % target_node)
6690       instance.primary_node = target_node
6691       self.cfg.Update(instance, self.feedback_fn)
6692       demoted_node = source_node
6693     else:
6694       self.feedback_fn("* instance confirmed to be running on its"
6695                        " primary node (%s)" % source_node)
6696       demoted_node = target_node
6697
6698     if instance.disk_template in constants.DTS_INT_MIRROR:
6699       self._EnsureSecondary(demoted_node)
6700       try:
6701         self._WaitUntilSync()
6702       except errors.OpExecError:
6703         # we ignore here errors, since if the device is standalone, it
6704         # won't be able to sync
6705         pass
6706       self._GoStandalone()
6707       self._GoReconnect(False)
6708       self._WaitUntilSync()
6709
6710     self.feedback_fn("* done")
6711
6712   def _RevertDiskStatus(self):
6713     """Try to revert the disk status after a failed migration.
6714
6715     """
6716     target_node = self.target_node
6717     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6718       return
6719
6720     try:
6721       self._EnsureSecondary(target_node)
6722       self._GoStandalone()
6723       self._GoReconnect(False)
6724       self._WaitUntilSync()
6725     except errors.OpExecError, err:
6726       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
6727                          " please try to recover the instance manually;"
6728                          " error '%s'" % str(err))
6729
6730   def _AbortMigration(self):
6731     """Call the hypervisor code to abort a started migration.
6732
6733     """
6734     instance = self.instance
6735     target_node = self.target_node
6736     migration_info = self.migration_info
6737
6738     abort_result = self.rpc.call_finalize_migration(target_node,
6739                                                     instance,
6740                                                     migration_info,
6741                                                     False)
6742     abort_msg = abort_result.fail_msg
6743     if abort_msg:
6744       logging.error("Aborting migration failed on target node %s: %s",
6745                     target_node, abort_msg)
6746       # Don't raise an exception here, as we stil have to try to revert the
6747       # disk status, even if this step failed.
6748
6749   def _ExecMigration(self):
6750     """Migrate an instance.
6751
6752     The migrate is done by:
6753       - change the disks into dual-master mode
6754       - wait until disks are fully synchronized again
6755       - migrate the instance
6756       - change disks on the new secondary node (the old primary) to secondary
6757       - wait until disks are fully synchronized
6758       - change disks into single-master mode
6759
6760     """
6761     instance = self.instance
6762     target_node = self.target_node
6763     source_node = self.source_node
6764
6765     self.feedback_fn("* checking disk consistency between source and target")
6766     for dev in instance.disks:
6767       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6768         raise errors.OpExecError("Disk %s is degraded or not fully"
6769                                  " synchronized on target node,"
6770                                  " aborting migration" % dev.iv_name)
6771
6772     # First get the migration information from the remote node
6773     result = self.rpc.call_migration_info(source_node, instance)
6774     msg = result.fail_msg
6775     if msg:
6776       log_err = ("Failed fetching source migration information from %s: %s" %
6777                  (source_node, msg))
6778       logging.error(log_err)
6779       raise errors.OpExecError(log_err)
6780
6781     self.migration_info = migration_info = result.payload
6782
6783     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6784       # Then switch the disks to master/master mode
6785       self._EnsureSecondary(target_node)
6786       self._GoStandalone()
6787       self._GoReconnect(True)
6788       self._WaitUntilSync()
6789
6790     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6791     result = self.rpc.call_accept_instance(target_node,
6792                                            instance,
6793                                            migration_info,
6794                                            self.nodes_ip[target_node])
6795
6796     msg = result.fail_msg
6797     if msg:
6798       logging.error("Instance pre-migration failed, trying to revert"
6799                     " disk status: %s", msg)
6800       self.feedback_fn("Pre-migration failed, aborting")
6801       self._AbortMigration()
6802       self._RevertDiskStatus()
6803       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6804                                (instance.name, msg))
6805
6806     self.feedback_fn("* migrating instance to %s" % target_node)
6807     result = self.rpc.call_instance_migrate(source_node, instance,
6808                                             self.nodes_ip[target_node],
6809                                             self.live)
6810     msg = result.fail_msg
6811     if msg:
6812       logging.error("Instance migration failed, trying to revert"
6813                     " disk status: %s", msg)
6814       self.feedback_fn("Migration failed, aborting")
6815       self._AbortMigration()
6816       self._RevertDiskStatus()
6817       raise errors.OpExecError("Could not migrate instance %s: %s" %
6818                                (instance.name, msg))
6819
6820     instance.primary_node = target_node
6821     # distribute new instance config to the other nodes
6822     self.cfg.Update(instance, self.feedback_fn)
6823
6824     result = self.rpc.call_finalize_migration(target_node,
6825                                               instance,
6826                                               migration_info,
6827                                               True)
6828     msg = result.fail_msg
6829     if msg:
6830       logging.error("Instance migration succeeded, but finalization failed:"
6831                     " %s", msg)
6832       raise errors.OpExecError("Could not finalize instance migration: %s" %
6833                                msg)
6834
6835     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6836       self._EnsureSecondary(source_node)
6837       self._WaitUntilSync()
6838       self._GoStandalone()
6839       self._GoReconnect(False)
6840       self._WaitUntilSync()
6841
6842     self.feedback_fn("* done")
6843
6844   def _ExecFailover(self):
6845     """Failover an instance.
6846
6847     The failover is done by shutting it down on its present node and
6848     starting it on the secondary.
6849
6850     """
6851     instance = self.instance
6852     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6853
6854     source_node = instance.primary_node
6855     target_node = self.target_node
6856
6857     if instance.admin_up:
6858       self.feedback_fn("* checking disk consistency between source and target")
6859       for dev in instance.disks:
6860         # for drbd, these are drbd over lvm
6861         if not _CheckDiskConsistency(self, dev, target_node, False):
6862           if not self.ignore_consistency:
6863             raise errors.OpExecError("Disk %s is degraded on target node,"
6864                                      " aborting failover" % dev.iv_name)
6865     else:
6866       self.feedback_fn("* not checking disk consistency as instance is not"
6867                        " running")
6868
6869     self.feedback_fn("* shutting down instance on source node")
6870     logging.info("Shutting down instance %s on node %s",
6871                  instance.name, source_node)
6872
6873     result = self.rpc.call_instance_shutdown(source_node, instance,
6874                                              self.shutdown_timeout)
6875     msg = result.fail_msg
6876     if msg:
6877       if self.ignore_consistency or primary_node.offline:
6878         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
6879                            " proceeding anyway; please make sure node"
6880                            " %s is down; error details: %s",
6881                            instance.name, source_node, source_node, msg)
6882       else:
6883         raise errors.OpExecError("Could not shutdown instance %s on"
6884                                  " node %s: %s" %
6885                                  (instance.name, source_node, msg))
6886
6887     self.feedback_fn("* deactivating the instance's disks on source node")
6888     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6889       raise errors.OpExecError("Can't shut down the instance's disks.")
6890
6891     instance.primary_node = target_node
6892     # distribute new instance config to the other nodes
6893     self.cfg.Update(instance, self.feedback_fn)
6894
6895     # Only start the instance if it's marked as up
6896     if instance.admin_up:
6897       self.feedback_fn("* activating the instance's disks on target node")
6898       logging.info("Starting instance %s on node %s",
6899                    instance.name, target_node)
6900
6901       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6902                                            ignore_secondaries=True)
6903       if not disks_ok:
6904         _ShutdownInstanceDisks(self, instance)
6905         raise errors.OpExecError("Can't activate the instance's disks")
6906
6907       self.feedback_fn("* starting the instance on the target node")
6908       result = self.rpc.call_instance_start(target_node, instance, None, None)
6909       msg = result.fail_msg
6910       if msg:
6911         _ShutdownInstanceDisks(self, instance)
6912         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6913                                  (instance.name, target_node, msg))
6914
6915   def Exec(self, feedback_fn):
6916     """Perform the migration.
6917
6918     """
6919     self.feedback_fn = feedback_fn
6920     self.source_node = self.instance.primary_node
6921
6922     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6923     if self.instance.disk_template in constants.DTS_INT_MIRROR:
6924       self.target_node = self.instance.secondary_nodes[0]
6925       # Otherwise self.target_node has been populated either
6926       # directly, or through an iallocator.
6927
6928     self.all_nodes = [self.source_node, self.target_node]
6929     self.nodes_ip = {
6930       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6931       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6932       }
6933
6934     if self.failover:
6935       feedback_fn("Failover instance %s" % self.instance.name)
6936       self._ExecFailover()
6937     else:
6938       feedback_fn("Migrating instance %s" % self.instance.name)
6939
6940       if self.cleanup:
6941         return self._ExecCleanup()
6942       else:
6943         return self._ExecMigration()
6944
6945
6946 def _CreateBlockDev(lu, node, instance, device, force_create,
6947                     info, force_open):
6948   """Create a tree of block devices on a given node.
6949
6950   If this device type has to be created on secondaries, create it and
6951   all its children.
6952
6953   If not, just recurse to children keeping the same 'force' value.
6954
6955   @param lu: the lu on whose behalf we execute
6956   @param node: the node on which to create the device
6957   @type instance: L{objects.Instance}
6958   @param instance: the instance which owns the device
6959   @type device: L{objects.Disk}
6960   @param device: the device to create
6961   @type force_create: boolean
6962   @param force_create: whether to force creation of this device; this
6963       will be change to True whenever we find a device which has
6964       CreateOnSecondary() attribute
6965   @param info: the extra 'metadata' we should attach to the device
6966       (this will be represented as a LVM tag)
6967   @type force_open: boolean
6968   @param force_open: this parameter will be passes to the
6969       L{backend.BlockdevCreate} function where it specifies
6970       whether we run on primary or not, and it affects both
6971       the child assembly and the device own Open() execution
6972
6973   """
6974   if device.CreateOnSecondary():
6975     force_create = True
6976
6977   if device.children:
6978     for child in device.children:
6979       _CreateBlockDev(lu, node, instance, child, force_create,
6980                       info, force_open)
6981
6982   if not force_create:
6983     return
6984
6985   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6986
6987
6988 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6989   """Create a single block device on a given node.
6990
6991   This will not recurse over children of the device, so they must be
6992   created in advance.
6993
6994   @param lu: the lu on whose behalf we execute
6995   @param node: the node on which to create the device
6996   @type instance: L{objects.Instance}
6997   @param instance: the instance which owns the device
6998   @type device: L{objects.Disk}
6999   @param device: the device to create
7000   @param info: the extra 'metadata' we should attach to the device
7001       (this will be represented as a LVM tag)
7002   @type force_open: boolean
7003   @param force_open: this parameter will be passes to the
7004       L{backend.BlockdevCreate} function where it specifies
7005       whether we run on primary or not, and it affects both
7006       the child assembly and the device own Open() execution
7007
7008   """
7009   lu.cfg.SetDiskID(device, node)
7010   result = lu.rpc.call_blockdev_create(node, device, device.size,
7011                                        instance.name, force_open, info)
7012   result.Raise("Can't create block device %s on"
7013                " node %s for instance %s" % (device, node, instance.name))
7014   if device.physical_id is None:
7015     device.physical_id = result.payload
7016
7017
7018 def _GenerateUniqueNames(lu, exts):
7019   """Generate a suitable LV name.
7020
7021   This will generate a logical volume name for the given instance.
7022
7023   """
7024   results = []
7025   for val in exts:
7026     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7027     results.append("%s%s" % (new_id, val))
7028   return results
7029
7030
7031 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7032                          iv_name, p_minor, s_minor):
7033   """Generate a drbd8 device complete with its children.
7034
7035   """
7036   assert len(vgnames) == len(names) == 2
7037   port = lu.cfg.AllocatePort()
7038   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7039   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7040                           logical_id=(vgnames[0], names[0]))
7041   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7042                           logical_id=(vgnames[1], names[1]))
7043   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7044                           logical_id=(primary, secondary, port,
7045                                       p_minor, s_minor,
7046                                       shared_secret),
7047                           children=[dev_data, dev_meta],
7048                           iv_name=iv_name)
7049   return drbd_dev
7050
7051
7052 def _GenerateDiskTemplate(lu, template_name,
7053                           instance_name, primary_node,
7054                           secondary_nodes, disk_info,
7055                           file_storage_dir, file_driver,
7056                           base_index, feedback_fn):
7057   """Generate the entire disk layout for a given template type.
7058
7059   """
7060   #TODO: compute space requirements
7061
7062   vgname = lu.cfg.GetVGName()
7063   disk_count = len(disk_info)
7064   disks = []
7065   if template_name == constants.DT_DISKLESS:
7066     pass
7067   elif template_name == constants.DT_PLAIN:
7068     if len(secondary_nodes) != 0:
7069       raise errors.ProgrammerError("Wrong template configuration")
7070
7071     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7072                                       for i in range(disk_count)])
7073     for idx, disk in enumerate(disk_info):
7074       disk_index = idx + base_index
7075       vg = disk.get(constants.IDISK_VG, vgname)
7076       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7077       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7078                               size=disk[constants.IDISK_SIZE],
7079                               logical_id=(vg, names[idx]),
7080                               iv_name="disk/%d" % disk_index,
7081                               mode=disk[constants.IDISK_MODE])
7082       disks.append(disk_dev)
7083   elif template_name == constants.DT_DRBD8:
7084     if len(secondary_nodes) != 1:
7085       raise errors.ProgrammerError("Wrong template configuration")
7086     remote_node = secondary_nodes[0]
7087     minors = lu.cfg.AllocateDRBDMinor(
7088       [primary_node, remote_node] * len(disk_info), instance_name)
7089
7090     names = []
7091     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7092                                                for i in range(disk_count)]):
7093       names.append(lv_prefix + "_data")
7094       names.append(lv_prefix + "_meta")
7095     for idx, disk in enumerate(disk_info):
7096       disk_index = idx + base_index
7097       data_vg = disk.get(constants.IDISK_VG, vgname)
7098       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7099       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7100                                       disk[constants.IDISK_SIZE],
7101                                       [data_vg, meta_vg],
7102                                       names[idx * 2:idx * 2 + 2],
7103                                       "disk/%d" % disk_index,
7104                                       minors[idx * 2], minors[idx * 2 + 1])
7105       disk_dev.mode = disk[constants.IDISK_MODE]
7106       disks.append(disk_dev)
7107   elif template_name == constants.DT_FILE:
7108     if len(secondary_nodes) != 0:
7109       raise errors.ProgrammerError("Wrong template configuration")
7110
7111     opcodes.RequireFileStorage()
7112
7113     for idx, disk in enumerate(disk_info):
7114       disk_index = idx + base_index
7115       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7116                               size=disk[constants.IDISK_SIZE],
7117                               iv_name="disk/%d" % disk_index,
7118                               logical_id=(file_driver,
7119                                           "%s/disk%d" % (file_storage_dir,
7120                                                          disk_index)),
7121                               mode=disk[constants.IDISK_MODE])
7122       disks.append(disk_dev)
7123   elif template_name == constants.DT_SHARED_FILE:
7124     if len(secondary_nodes) != 0:
7125       raise errors.ProgrammerError("Wrong template configuration")
7126
7127     opcodes.RequireSharedFileStorage()
7128
7129     for idx, disk in enumerate(disk_info):
7130       disk_index = idx + base_index
7131       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7132                               size=disk[constants.IDISK_SIZE],
7133                               iv_name="disk/%d" % disk_index,
7134                               logical_id=(file_driver,
7135                                           "%s/disk%d" % (file_storage_dir,
7136                                                          disk_index)),
7137                               mode=disk[constants.IDISK_MODE])
7138       disks.append(disk_dev)
7139   elif template_name == constants.DT_BLOCK:
7140     if len(secondary_nodes) != 0:
7141       raise errors.ProgrammerError("Wrong template configuration")
7142
7143     for idx, disk in enumerate(disk_info):
7144       disk_index = idx + base_index
7145       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7146                               size=disk[constants.IDISK_SIZE],
7147                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7148                                           disk[constants.IDISK_ADOPT]),
7149                               iv_name="disk/%d" % disk_index,
7150                               mode=disk[constants.IDISK_MODE])
7151       disks.append(disk_dev)
7152
7153   else:
7154     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7155   return disks
7156
7157
7158 def _GetInstanceInfoText(instance):
7159   """Compute that text that should be added to the disk's metadata.
7160
7161   """
7162   return "originstname+%s" % instance.name
7163
7164
7165 def _CalcEta(time_taken, written, total_size):
7166   """Calculates the ETA based on size written and total size.
7167
7168   @param time_taken: The time taken so far
7169   @param written: amount written so far
7170   @param total_size: The total size of data to be written
7171   @return: The remaining time in seconds
7172
7173   """
7174   avg_time = time_taken / float(written)
7175   return (total_size - written) * avg_time
7176
7177
7178 def _WipeDisks(lu, instance):
7179   """Wipes instance disks.
7180
7181   @type lu: L{LogicalUnit}
7182   @param lu: the logical unit on whose behalf we execute
7183   @type instance: L{objects.Instance}
7184   @param instance: the instance whose disks we should create
7185   @return: the success of the wipe
7186
7187   """
7188   node = instance.primary_node
7189
7190   for device in instance.disks:
7191     lu.cfg.SetDiskID(device, node)
7192
7193   logging.info("Pause sync of instance %s disks", instance.name)
7194   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7195
7196   for idx, success in enumerate(result.payload):
7197     if not success:
7198       logging.warn("pause-sync of instance %s for disks %d failed",
7199                    instance.name, idx)
7200
7201   try:
7202     for idx, device in enumerate(instance.disks):
7203       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7204       # MAX_WIPE_CHUNK at max
7205       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7206                             constants.MIN_WIPE_CHUNK_PERCENT)
7207       # we _must_ make this an int, otherwise rounding errors will
7208       # occur
7209       wipe_chunk_size = int(wipe_chunk_size)
7210
7211       lu.LogInfo("* Wiping disk %d", idx)
7212       logging.info("Wiping disk %d for instance %s, node %s using"
7213                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7214
7215       offset = 0
7216       size = device.size
7217       last_output = 0
7218       start_time = time.time()
7219
7220       while offset < size:
7221         wipe_size = min(wipe_chunk_size, size - offset)
7222         logging.debug("Wiping disk %d, offset %s, chunk %s",
7223                       idx, offset, wipe_size)
7224         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7225         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7226                      (idx, offset, wipe_size))
7227         now = time.time()
7228         offset += wipe_size
7229         if now - last_output >= 60:
7230           eta = _CalcEta(now - start_time, offset, size)
7231           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7232                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7233           last_output = now
7234   finally:
7235     logging.info("Resume sync of instance %s disks", instance.name)
7236
7237     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7238
7239     for idx, success in enumerate(result.payload):
7240       if not success:
7241         lu.LogWarning("Resume sync of disk %d failed, please have a"
7242                       " look at the status and troubleshoot the issue", idx)
7243         logging.warn("resume-sync of instance %s for disks %d failed",
7244                      instance.name, idx)
7245
7246
7247 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7248   """Create all disks for an instance.
7249
7250   This abstracts away some work from AddInstance.
7251
7252   @type lu: L{LogicalUnit}
7253   @param lu: the logical unit on whose behalf we execute
7254   @type instance: L{objects.Instance}
7255   @param instance: the instance whose disks we should create
7256   @type to_skip: list
7257   @param to_skip: list of indices to skip
7258   @type target_node: string
7259   @param target_node: if passed, overrides the target node for creation
7260   @rtype: boolean
7261   @return: the success of the creation
7262
7263   """
7264   info = _GetInstanceInfoText(instance)
7265   if target_node is None:
7266     pnode = instance.primary_node
7267     all_nodes = instance.all_nodes
7268   else:
7269     pnode = target_node
7270     all_nodes = [pnode]
7271
7272   if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7273     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7274     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7275
7276     result.Raise("Failed to create directory '%s' on"
7277                  " node %s" % (file_storage_dir, pnode))
7278
7279   # Note: this needs to be kept in sync with adding of disks in
7280   # LUInstanceSetParams
7281   for idx, device in enumerate(instance.disks):
7282     if to_skip and idx in to_skip:
7283       continue
7284     logging.info("Creating volume %s for instance %s",
7285                  device.iv_name, instance.name)
7286     #HARDCODE
7287     for node in all_nodes:
7288       f_create = node == pnode
7289       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7290
7291
7292 def _RemoveDisks(lu, instance, target_node=None):
7293   """Remove all disks for an instance.
7294
7295   This abstracts away some work from `AddInstance()` and
7296   `RemoveInstance()`. Note that in case some of the devices couldn't
7297   be removed, the removal will continue with the other ones (compare
7298   with `_CreateDisks()`).
7299
7300   @type lu: L{LogicalUnit}
7301   @param lu: the logical unit on whose behalf we execute
7302   @type instance: L{objects.Instance}
7303   @param instance: the instance whose disks we should remove
7304   @type target_node: string
7305   @param target_node: used to override the node on which to remove the disks
7306   @rtype: boolean
7307   @return: the success of the removal
7308
7309   """
7310   logging.info("Removing block devices for instance %s", instance.name)
7311
7312   all_result = True
7313   for device in instance.disks:
7314     if target_node:
7315       edata = [(target_node, device)]
7316     else:
7317       edata = device.ComputeNodeTree(instance.primary_node)
7318     for node, disk in edata:
7319       lu.cfg.SetDiskID(disk, node)
7320       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7321       if msg:
7322         lu.LogWarning("Could not remove block device %s on node %s,"
7323                       " continuing anyway: %s", device.iv_name, node, msg)
7324         all_result = False
7325
7326   if instance.disk_template == constants.DT_FILE:
7327     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7328     if target_node:
7329       tgt = target_node
7330     else:
7331       tgt = instance.primary_node
7332     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7333     if result.fail_msg:
7334       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7335                     file_storage_dir, instance.primary_node, result.fail_msg)
7336       all_result = False
7337
7338   return all_result
7339
7340
7341 def _ComputeDiskSizePerVG(disk_template, disks):
7342   """Compute disk size requirements in the volume group
7343
7344   """
7345   def _compute(disks, payload):
7346     """Universal algorithm.
7347
7348     """
7349     vgs = {}
7350     for disk in disks:
7351       vgs[disk[constants.IDISK_VG]] = \
7352         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7353
7354     return vgs
7355
7356   # Required free disk space as a function of disk and swap space
7357   req_size_dict = {
7358     constants.DT_DISKLESS: {},
7359     constants.DT_PLAIN: _compute(disks, 0),
7360     # 128 MB are added for drbd metadata for each disk
7361     constants.DT_DRBD8: _compute(disks, 128),
7362     constants.DT_FILE: {},
7363     constants.DT_SHARED_FILE: {},
7364   }
7365
7366   if disk_template not in req_size_dict:
7367     raise errors.ProgrammerError("Disk template '%s' size requirement"
7368                                  " is unknown" %  disk_template)
7369
7370   return req_size_dict[disk_template]
7371
7372
7373 def _ComputeDiskSize(disk_template, disks):
7374   """Compute disk size requirements in the volume group
7375
7376   """
7377   # Required free disk space as a function of disk and swap space
7378   req_size_dict = {
7379     constants.DT_DISKLESS: None,
7380     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7381     # 128 MB are added for drbd metadata for each disk
7382     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7383     constants.DT_FILE: None,
7384     constants.DT_SHARED_FILE: 0,
7385     constants.DT_BLOCK: 0,
7386   }
7387
7388   if disk_template not in req_size_dict:
7389     raise errors.ProgrammerError("Disk template '%s' size requirement"
7390                                  " is unknown" %  disk_template)
7391
7392   return req_size_dict[disk_template]
7393
7394
7395 def _FilterVmNodes(lu, nodenames):
7396   """Filters out non-vm_capable nodes from a list.
7397
7398   @type lu: L{LogicalUnit}
7399   @param lu: the logical unit for which we check
7400   @type nodenames: list
7401   @param nodenames: the list of nodes on which we should check
7402   @rtype: list
7403   @return: the list of vm-capable nodes
7404
7405   """
7406   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7407   return [name for name in nodenames if name not in vm_nodes]
7408
7409
7410 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7411   """Hypervisor parameter validation.
7412
7413   This function abstract the hypervisor parameter validation to be
7414   used in both instance create and instance modify.
7415
7416   @type lu: L{LogicalUnit}
7417   @param lu: the logical unit for which we check
7418   @type nodenames: list
7419   @param nodenames: the list of nodes on which we should check
7420   @type hvname: string
7421   @param hvname: the name of the hypervisor we should use
7422   @type hvparams: dict
7423   @param hvparams: the parameters which we need to check
7424   @raise errors.OpPrereqError: if the parameters are not valid
7425
7426   """
7427   nodenames = _FilterVmNodes(lu, nodenames)
7428   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7429                                                   hvname,
7430                                                   hvparams)
7431   for node in nodenames:
7432     info = hvinfo[node]
7433     if info.offline:
7434       continue
7435     info.Raise("Hypervisor parameter validation failed on node %s" % node)
7436
7437
7438 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7439   """OS parameters validation.
7440
7441   @type lu: L{LogicalUnit}
7442   @param lu: the logical unit for which we check
7443   @type required: boolean
7444   @param required: whether the validation should fail if the OS is not
7445       found
7446   @type nodenames: list
7447   @param nodenames: the list of nodes on which we should check
7448   @type osname: string
7449   @param osname: the name of the hypervisor we should use
7450   @type osparams: dict
7451   @param osparams: the parameters which we need to check
7452   @raise errors.OpPrereqError: if the parameters are not valid
7453
7454   """
7455   nodenames = _FilterVmNodes(lu, nodenames)
7456   result = lu.rpc.call_os_validate(required, nodenames, osname,
7457                                    [constants.OS_VALIDATE_PARAMETERS],
7458                                    osparams)
7459   for node, nres in result.items():
7460     # we don't check for offline cases since this should be run only
7461     # against the master node and/or an instance's nodes
7462     nres.Raise("OS Parameters validation failed on node %s" % node)
7463     if not nres.payload:
7464       lu.LogInfo("OS %s not found on node %s, validation skipped",
7465                  osname, node)
7466
7467
7468 class LUInstanceCreate(LogicalUnit):
7469   """Create an instance.
7470
7471   """
7472   HPATH = "instance-add"
7473   HTYPE = constants.HTYPE_INSTANCE
7474   REQ_BGL = False
7475
7476   def CheckArguments(self):
7477     """Check arguments.
7478
7479     """
7480     # do not require name_check to ease forward/backward compatibility
7481     # for tools
7482     if self.op.no_install and self.op.start:
7483       self.LogInfo("No-installation mode selected, disabling startup")
7484       self.op.start = False
7485     # validate/normalize the instance name
7486     self.op.instance_name = \
7487       netutils.Hostname.GetNormalizedName(self.op.instance_name)
7488
7489     if self.op.ip_check and not self.op.name_check:
7490       # TODO: make the ip check more flexible and not depend on the name check
7491       raise errors.OpPrereqError("Cannot do IP address check without a name"
7492                                  " check", errors.ECODE_INVAL)
7493
7494     # check nics' parameter names
7495     for nic in self.op.nics:
7496       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7497
7498     # check disks. parameter names and consistent adopt/no-adopt strategy
7499     has_adopt = has_no_adopt = False
7500     for disk in self.op.disks:
7501       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7502       if constants.IDISK_ADOPT in disk:
7503         has_adopt = True
7504       else:
7505         has_no_adopt = True
7506     if has_adopt and has_no_adopt:
7507       raise errors.OpPrereqError("Either all disks are adopted or none is",
7508                                  errors.ECODE_INVAL)
7509     if has_adopt:
7510       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7511         raise errors.OpPrereqError("Disk adoption is not supported for the"
7512                                    " '%s' disk template" %
7513                                    self.op.disk_template,
7514                                    errors.ECODE_INVAL)
7515       if self.op.iallocator is not None:
7516         raise errors.OpPrereqError("Disk adoption not allowed with an"
7517                                    " iallocator script", errors.ECODE_INVAL)
7518       if self.op.mode == constants.INSTANCE_IMPORT:
7519         raise errors.OpPrereqError("Disk adoption not allowed for"
7520                                    " instance import", errors.ECODE_INVAL)
7521     else:
7522       if self.op.disk_template in constants.DTS_MUST_ADOPT:
7523         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7524                                    " but no 'adopt' parameter given" %
7525                                    self.op.disk_template,
7526                                    errors.ECODE_INVAL)
7527
7528     self.adopt_disks = has_adopt
7529
7530     # instance name verification
7531     if self.op.name_check:
7532       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7533       self.op.instance_name = self.hostname1.name
7534       # used in CheckPrereq for ip ping check
7535       self.check_ip = self.hostname1.ip
7536     else:
7537       self.check_ip = None
7538
7539     # file storage checks
7540     if (self.op.file_driver and
7541         not self.op.file_driver in constants.FILE_DRIVER):
7542       raise errors.OpPrereqError("Invalid file driver name '%s'" %
7543                                  self.op.file_driver, errors.ECODE_INVAL)
7544
7545     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7546       raise errors.OpPrereqError("File storage directory path not absolute",
7547                                  errors.ECODE_INVAL)
7548
7549     ### Node/iallocator related checks
7550     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7551
7552     if self.op.pnode is not None:
7553       if self.op.disk_template in constants.DTS_INT_MIRROR:
7554         if self.op.snode is None:
7555           raise errors.OpPrereqError("The networked disk templates need"
7556                                      " a mirror node", errors.ECODE_INVAL)
7557       elif self.op.snode:
7558         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7559                         " template")
7560         self.op.snode = None
7561
7562     self._cds = _GetClusterDomainSecret()
7563
7564     if self.op.mode == constants.INSTANCE_IMPORT:
7565       # On import force_variant must be True, because if we forced it at
7566       # initial install, our only chance when importing it back is that it
7567       # works again!
7568       self.op.force_variant = True
7569
7570       if self.op.no_install:
7571         self.LogInfo("No-installation mode has no effect during import")
7572
7573     elif self.op.mode == constants.INSTANCE_CREATE:
7574       if self.op.os_type is None:
7575         raise errors.OpPrereqError("No guest OS specified",
7576                                    errors.ECODE_INVAL)
7577       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7578         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7579                                    " installation" % self.op.os_type,
7580                                    errors.ECODE_STATE)
7581       if self.op.disk_template is None:
7582         raise errors.OpPrereqError("No disk template specified",
7583                                    errors.ECODE_INVAL)
7584
7585     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7586       # Check handshake to ensure both clusters have the same domain secret
7587       src_handshake = self.op.source_handshake
7588       if not src_handshake:
7589         raise errors.OpPrereqError("Missing source handshake",
7590                                    errors.ECODE_INVAL)
7591
7592       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7593                                                            src_handshake)
7594       if errmsg:
7595         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7596                                    errors.ECODE_INVAL)
7597
7598       # Load and check source CA
7599       self.source_x509_ca_pem = self.op.source_x509_ca
7600       if not self.source_x509_ca_pem:
7601         raise errors.OpPrereqError("Missing source X509 CA",
7602                                    errors.ECODE_INVAL)
7603
7604       try:
7605         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7606                                                     self._cds)
7607       except OpenSSL.crypto.Error, err:
7608         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7609                                    (err, ), errors.ECODE_INVAL)
7610
7611       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7612       if errcode is not None:
7613         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7614                                    errors.ECODE_INVAL)
7615
7616       self.source_x509_ca = cert
7617
7618       src_instance_name = self.op.source_instance_name
7619       if not src_instance_name:
7620         raise errors.OpPrereqError("Missing source instance name",
7621                                    errors.ECODE_INVAL)
7622
7623       self.source_instance_name = \
7624           netutils.GetHostname(name=src_instance_name).name
7625
7626     else:
7627       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7628                                  self.op.mode, errors.ECODE_INVAL)
7629
7630   def ExpandNames(self):
7631     """ExpandNames for CreateInstance.
7632
7633     Figure out the right locks for instance creation.
7634
7635     """
7636     self.needed_locks = {}
7637
7638     instance_name = self.op.instance_name
7639     # this is just a preventive check, but someone might still add this
7640     # instance in the meantime, and creation will fail at lock-add time
7641     if instance_name in self.cfg.GetInstanceList():
7642       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7643                                  instance_name, errors.ECODE_EXISTS)
7644
7645     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7646
7647     if self.op.iallocator:
7648       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7649     else:
7650       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7651       nodelist = [self.op.pnode]
7652       if self.op.snode is not None:
7653         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7654         nodelist.append(self.op.snode)
7655       self.needed_locks[locking.LEVEL_NODE] = nodelist
7656
7657     # in case of import lock the source node too
7658     if self.op.mode == constants.INSTANCE_IMPORT:
7659       src_node = self.op.src_node
7660       src_path = self.op.src_path
7661
7662       if src_path is None:
7663         self.op.src_path = src_path = self.op.instance_name
7664
7665       if src_node is None:
7666         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7667         self.op.src_node = None
7668         if os.path.isabs(src_path):
7669           raise errors.OpPrereqError("Importing an instance from an absolute"
7670                                      " path requires a source node option",
7671                                      errors.ECODE_INVAL)
7672       else:
7673         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7674         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7675           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7676         if not os.path.isabs(src_path):
7677           self.op.src_path = src_path = \
7678             utils.PathJoin(constants.EXPORT_DIR, src_path)
7679
7680   def _RunAllocator(self):
7681     """Run the allocator based on input opcode.
7682
7683     """
7684     nics = [n.ToDict() for n in self.nics]
7685     ial = IAllocator(self.cfg, self.rpc,
7686                      mode=constants.IALLOCATOR_MODE_ALLOC,
7687                      name=self.op.instance_name,
7688                      disk_template=self.op.disk_template,
7689                      tags=[],
7690                      os=self.op.os_type,
7691                      vcpus=self.be_full[constants.BE_VCPUS],
7692                      mem_size=self.be_full[constants.BE_MEMORY],
7693                      disks=self.disks,
7694                      nics=nics,
7695                      hypervisor=self.op.hypervisor,
7696                      )
7697
7698     ial.Run(self.op.iallocator)
7699
7700     if not ial.success:
7701       raise errors.OpPrereqError("Can't compute nodes using"
7702                                  " iallocator '%s': %s" %
7703                                  (self.op.iallocator, ial.info),
7704                                  errors.ECODE_NORES)
7705     if len(ial.result) != ial.required_nodes:
7706       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7707                                  " of nodes (%s), required %s" %
7708                                  (self.op.iallocator, len(ial.result),
7709                                   ial.required_nodes), errors.ECODE_FAULT)
7710     self.op.pnode = ial.result[0]
7711     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7712                  self.op.instance_name, self.op.iallocator,
7713                  utils.CommaJoin(ial.result))
7714     if ial.required_nodes == 2:
7715       self.op.snode = ial.result[1]
7716
7717   def BuildHooksEnv(self):
7718     """Build hooks env.
7719
7720     This runs on master, primary and secondary nodes of the instance.
7721
7722     """
7723     env = {
7724       "ADD_MODE": self.op.mode,
7725       }
7726     if self.op.mode == constants.INSTANCE_IMPORT:
7727       env["SRC_NODE"] = self.op.src_node
7728       env["SRC_PATH"] = self.op.src_path
7729       env["SRC_IMAGES"] = self.src_images
7730
7731     env.update(_BuildInstanceHookEnv(
7732       name=self.op.instance_name,
7733       primary_node=self.op.pnode,
7734       secondary_nodes=self.secondaries,
7735       status=self.op.start,
7736       os_type=self.op.os_type,
7737       memory=self.be_full[constants.BE_MEMORY],
7738       vcpus=self.be_full[constants.BE_VCPUS],
7739       nics=_NICListToTuple(self, self.nics),
7740       disk_template=self.op.disk_template,
7741       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7742              for d in self.disks],
7743       bep=self.be_full,
7744       hvp=self.hv_full,
7745       hypervisor_name=self.op.hypervisor,
7746     ))
7747
7748     return env
7749
7750   def BuildHooksNodes(self):
7751     """Build hooks nodes.
7752
7753     """
7754     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7755     return nl, nl
7756
7757   def _ReadExportInfo(self):
7758     """Reads the export information from disk.
7759
7760     It will override the opcode source node and path with the actual
7761     information, if these two were not specified before.
7762
7763     @return: the export information
7764
7765     """
7766     assert self.op.mode == constants.INSTANCE_IMPORT
7767
7768     src_node = self.op.src_node
7769     src_path = self.op.src_path
7770
7771     if src_node is None:
7772       locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
7773       exp_list = self.rpc.call_export_list(locked_nodes)
7774       found = False
7775       for node in exp_list:
7776         if exp_list[node].fail_msg:
7777           continue
7778         if src_path in exp_list[node].payload:
7779           found = True
7780           self.op.src_node = src_node = node
7781           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7782                                                        src_path)
7783           break
7784       if not found:
7785         raise errors.OpPrereqError("No export found for relative path %s" %
7786                                     src_path, errors.ECODE_INVAL)
7787
7788     _CheckNodeOnline(self, src_node)
7789     result = self.rpc.call_export_info(src_node, src_path)
7790     result.Raise("No export or invalid export found in dir %s" % src_path)
7791
7792     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7793     if not export_info.has_section(constants.INISECT_EXP):
7794       raise errors.ProgrammerError("Corrupted export config",
7795                                    errors.ECODE_ENVIRON)
7796
7797     ei_version = export_info.get(constants.INISECT_EXP, "version")
7798     if (int(ei_version) != constants.EXPORT_VERSION):
7799       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7800                                  (ei_version, constants.EXPORT_VERSION),
7801                                  errors.ECODE_ENVIRON)
7802     return export_info
7803
7804   def _ReadExportParams(self, einfo):
7805     """Use export parameters as defaults.
7806
7807     In case the opcode doesn't specify (as in override) some instance
7808     parameters, then try to use them from the export information, if
7809     that declares them.
7810
7811     """
7812     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7813
7814     if self.op.disk_template is None:
7815       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7816         self.op.disk_template = einfo.get(constants.INISECT_INS,
7817                                           "disk_template")
7818       else:
7819         raise errors.OpPrereqError("No disk template specified and the export"
7820                                    " is missing the disk_template information",
7821                                    errors.ECODE_INVAL)
7822
7823     if not self.op.disks:
7824       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7825         disks = []
7826         # TODO: import the disk iv_name too
7827         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7828           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7829           disks.append({constants.IDISK_SIZE: disk_sz})
7830         self.op.disks = disks
7831       else:
7832         raise errors.OpPrereqError("No disk info specified and the export"
7833                                    " is missing the disk information",
7834                                    errors.ECODE_INVAL)
7835
7836     if (not self.op.nics and
7837         einfo.has_option(constants.INISECT_INS, "nic_count")):
7838       nics = []
7839       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7840         ndict = {}
7841         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7842           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7843           ndict[name] = v
7844         nics.append(ndict)
7845       self.op.nics = nics
7846
7847     if (self.op.hypervisor is None and
7848         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7849       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7850     if einfo.has_section(constants.INISECT_HYP):
7851       # use the export parameters but do not override the ones
7852       # specified by the user
7853       for name, value in einfo.items(constants.INISECT_HYP):
7854         if name not in self.op.hvparams:
7855           self.op.hvparams[name] = value
7856
7857     if einfo.has_section(constants.INISECT_BEP):
7858       # use the parameters, without overriding
7859       for name, value in einfo.items(constants.INISECT_BEP):
7860         if name not in self.op.beparams:
7861           self.op.beparams[name] = value
7862     else:
7863       # try to read the parameters old style, from the main section
7864       for name in constants.BES_PARAMETERS:
7865         if (name not in self.op.beparams and
7866             einfo.has_option(constants.INISECT_INS, name)):
7867           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7868
7869     if einfo.has_section(constants.INISECT_OSP):
7870       # use the parameters, without overriding
7871       for name, value in einfo.items(constants.INISECT_OSP):
7872         if name not in self.op.osparams:
7873           self.op.osparams[name] = value
7874
7875   def _RevertToDefaults(self, cluster):
7876     """Revert the instance parameters to the default values.
7877
7878     """
7879     # hvparams
7880     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7881     for name in self.op.hvparams.keys():
7882       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7883         del self.op.hvparams[name]
7884     # beparams
7885     be_defs = cluster.SimpleFillBE({})
7886     for name in self.op.beparams.keys():
7887       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7888         del self.op.beparams[name]
7889     # nic params
7890     nic_defs = cluster.SimpleFillNIC({})
7891     for nic in self.op.nics:
7892       for name in constants.NICS_PARAMETERS:
7893         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7894           del nic[name]
7895     # osparams
7896     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7897     for name in self.op.osparams.keys():
7898       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7899         del self.op.osparams[name]
7900
7901   def CheckPrereq(self):
7902     """Check prerequisites.
7903
7904     """
7905     if self.op.mode == constants.INSTANCE_IMPORT:
7906       export_info = self._ReadExportInfo()
7907       self._ReadExportParams(export_info)
7908
7909     if (not self.cfg.GetVGName() and
7910         self.op.disk_template not in constants.DTS_NOT_LVM):
7911       raise errors.OpPrereqError("Cluster does not support lvm-based"
7912                                  " instances", errors.ECODE_STATE)
7913
7914     if self.op.hypervisor is None:
7915       self.op.hypervisor = self.cfg.GetHypervisorType()
7916
7917     cluster = self.cfg.GetClusterInfo()
7918     enabled_hvs = cluster.enabled_hypervisors
7919     if self.op.hypervisor not in enabled_hvs:
7920       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7921                                  " cluster (%s)" % (self.op.hypervisor,
7922                                   ",".join(enabled_hvs)),
7923                                  errors.ECODE_STATE)
7924
7925     # check hypervisor parameter syntax (locally)
7926     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7927     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7928                                       self.op.hvparams)
7929     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7930     hv_type.CheckParameterSyntax(filled_hvp)
7931     self.hv_full = filled_hvp
7932     # check that we don't specify global parameters on an instance
7933     _CheckGlobalHvParams(self.op.hvparams)
7934
7935     # fill and remember the beparams dict
7936     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7937     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7938
7939     # build os parameters
7940     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7941
7942     # now that hvp/bep are in final format, let's reset to defaults,
7943     # if told to do so
7944     if self.op.identify_defaults:
7945       self._RevertToDefaults(cluster)
7946
7947     # NIC buildup
7948     self.nics = []
7949     for idx, nic in enumerate(self.op.nics):
7950       nic_mode_req = nic.get(constants.INIC_MODE, None)
7951       nic_mode = nic_mode_req
7952       if nic_mode is None:
7953         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7954
7955       # in routed mode, for the first nic, the default ip is 'auto'
7956       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7957         default_ip_mode = constants.VALUE_AUTO
7958       else:
7959         default_ip_mode = constants.VALUE_NONE
7960
7961       # ip validity checks
7962       ip = nic.get(constants.INIC_IP, default_ip_mode)
7963       if ip is None or ip.lower() == constants.VALUE_NONE:
7964         nic_ip = None
7965       elif ip.lower() == constants.VALUE_AUTO:
7966         if not self.op.name_check:
7967           raise errors.OpPrereqError("IP address set to auto but name checks"
7968                                      " have been skipped",
7969                                      errors.ECODE_INVAL)
7970         nic_ip = self.hostname1.ip
7971       else:
7972         if not netutils.IPAddress.IsValid(ip):
7973           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7974                                      errors.ECODE_INVAL)
7975         nic_ip = ip
7976
7977       # TODO: check the ip address for uniqueness
7978       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7979         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7980                                    errors.ECODE_INVAL)
7981
7982       # MAC address verification
7983       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
7984       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7985         mac = utils.NormalizeAndValidateMac(mac)
7986
7987         try:
7988           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7989         except errors.ReservationError:
7990           raise errors.OpPrereqError("MAC address %s already in use"
7991                                      " in cluster" % mac,
7992                                      errors.ECODE_NOTUNIQUE)
7993
7994       #  Build nic parameters
7995       link = nic.get(constants.INIC_LINK, None)
7996       nicparams = {}
7997       if nic_mode_req:
7998         nicparams[constants.NIC_MODE] = nic_mode_req
7999       if link:
8000         nicparams[constants.NIC_LINK] = link
8001
8002       check_params = cluster.SimpleFillNIC(nicparams)
8003       objects.NIC.CheckParameterSyntax(check_params)
8004       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8005
8006     # disk checks/pre-build
8007     default_vg = self.cfg.GetVGName()
8008     self.disks = []
8009     for disk in self.op.disks:
8010       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8011       if mode not in constants.DISK_ACCESS_SET:
8012         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8013                                    mode, errors.ECODE_INVAL)
8014       size = disk.get(constants.IDISK_SIZE, None)
8015       if size is None:
8016         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8017       try:
8018         size = int(size)
8019       except (TypeError, ValueError):
8020         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8021                                    errors.ECODE_INVAL)
8022
8023       data_vg = disk.get(constants.IDISK_VG, default_vg)
8024       new_disk = {
8025         constants.IDISK_SIZE: size,
8026         constants.IDISK_MODE: mode,
8027         constants.IDISK_VG: data_vg,
8028         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8029         }
8030       if constants.IDISK_ADOPT in disk:
8031         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8032       self.disks.append(new_disk)
8033
8034     if self.op.mode == constants.INSTANCE_IMPORT:
8035
8036       # Check that the new instance doesn't have less disks than the export
8037       instance_disks = len(self.disks)
8038       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8039       if instance_disks < export_disks:
8040         raise errors.OpPrereqError("Not enough disks to import."
8041                                    " (instance: %d, export: %d)" %
8042                                    (instance_disks, export_disks),
8043                                    errors.ECODE_INVAL)
8044
8045       disk_images = []
8046       for idx in range(export_disks):
8047         option = 'disk%d_dump' % idx
8048         if export_info.has_option(constants.INISECT_INS, option):
8049           # FIXME: are the old os-es, disk sizes, etc. useful?
8050           export_name = export_info.get(constants.INISECT_INS, option)
8051           image = utils.PathJoin(self.op.src_path, export_name)
8052           disk_images.append(image)
8053         else:
8054           disk_images.append(False)
8055
8056       self.src_images = disk_images
8057
8058       old_name = export_info.get(constants.INISECT_INS, 'name')
8059       try:
8060         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8061       except (TypeError, ValueError), err:
8062         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8063                                    " an integer: %s" % str(err),
8064                                    errors.ECODE_STATE)
8065       if self.op.instance_name == old_name:
8066         for idx, nic in enumerate(self.nics):
8067           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8068             nic_mac_ini = 'nic%d_mac' % idx
8069             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8070
8071     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8072
8073     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8074     if self.op.ip_check:
8075       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8076         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8077                                    (self.check_ip, self.op.instance_name),
8078                                    errors.ECODE_NOTUNIQUE)
8079
8080     #### mac address generation
8081     # By generating here the mac address both the allocator and the hooks get
8082     # the real final mac address rather than the 'auto' or 'generate' value.
8083     # There is a race condition between the generation and the instance object
8084     # creation, which means that we know the mac is valid now, but we're not
8085     # sure it will be when we actually add the instance. If things go bad
8086     # adding the instance will abort because of a duplicate mac, and the
8087     # creation job will fail.
8088     for nic in self.nics:
8089       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8090         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8091
8092     #### allocator run
8093
8094     if self.op.iallocator is not None:
8095       self._RunAllocator()
8096
8097     #### node related checks
8098
8099     # check primary node
8100     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8101     assert self.pnode is not None, \
8102       "Cannot retrieve locked node %s" % self.op.pnode
8103     if pnode.offline:
8104       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8105                                  pnode.name, errors.ECODE_STATE)
8106     if pnode.drained:
8107       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8108                                  pnode.name, errors.ECODE_STATE)
8109     if not pnode.vm_capable:
8110       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8111                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8112
8113     self.secondaries = []
8114
8115     # mirror node verification
8116     if self.op.disk_template in constants.DTS_INT_MIRROR:
8117       if self.op.snode == pnode.name:
8118         raise errors.OpPrereqError("The secondary node cannot be the"
8119                                    " primary node", errors.ECODE_INVAL)
8120       _CheckNodeOnline(self, self.op.snode)
8121       _CheckNodeNotDrained(self, self.op.snode)
8122       _CheckNodeVmCapable(self, self.op.snode)
8123       self.secondaries.append(self.op.snode)
8124
8125     nodenames = [pnode.name] + self.secondaries
8126
8127     if not self.adopt_disks:
8128       # Check lv size requirements, if not adopting
8129       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8130       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8131
8132     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8133       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8134                                 disk[constants.IDISK_ADOPT])
8135                      for disk in self.disks])
8136       if len(all_lvs) != len(self.disks):
8137         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8138                                    errors.ECODE_INVAL)
8139       for lv_name in all_lvs:
8140         try:
8141           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8142           # to ReserveLV uses the same syntax
8143           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8144         except errors.ReservationError:
8145           raise errors.OpPrereqError("LV named %s used by another instance" %
8146                                      lv_name, errors.ECODE_NOTUNIQUE)
8147
8148       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8149       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8150
8151       node_lvs = self.rpc.call_lv_list([pnode.name],
8152                                        vg_names.payload.keys())[pnode.name]
8153       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8154       node_lvs = node_lvs.payload
8155
8156       delta = all_lvs.difference(node_lvs.keys())
8157       if delta:
8158         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8159                                    utils.CommaJoin(delta),
8160                                    errors.ECODE_INVAL)
8161       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8162       if online_lvs:
8163         raise errors.OpPrereqError("Online logical volumes found, cannot"
8164                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8165                                    errors.ECODE_STATE)
8166       # update the size of disk based on what is found
8167       for dsk in self.disks:
8168         dsk[constants.IDISK_SIZE] = \
8169           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8170                                         dsk[constants.IDISK_ADOPT])][0]))
8171
8172     elif self.op.disk_template == constants.DT_BLOCK:
8173       # Normalize and de-duplicate device paths
8174       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8175                        for disk in self.disks])
8176       if len(all_disks) != len(self.disks):
8177         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8178                                    errors.ECODE_INVAL)
8179       baddisks = [d for d in all_disks
8180                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8181       if baddisks:
8182         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8183                                    " cannot be adopted" %
8184                                    (", ".join(baddisks),
8185                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8186                                    errors.ECODE_INVAL)
8187
8188       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8189                                             list(all_disks))[pnode.name]
8190       node_disks.Raise("Cannot get block device information from node %s" %
8191                        pnode.name)
8192       node_disks = node_disks.payload
8193       delta = all_disks.difference(node_disks.keys())
8194       if delta:
8195         raise errors.OpPrereqError("Missing block device(s): %s" %
8196                                    utils.CommaJoin(delta),
8197                                    errors.ECODE_INVAL)
8198       for dsk in self.disks:
8199         dsk[constants.IDISK_SIZE] = \
8200           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8201
8202     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8203
8204     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8205     # check OS parameters (remotely)
8206     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8207
8208     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8209
8210     # memory check on primary node
8211     if self.op.start:
8212       _CheckNodeFreeMemory(self, self.pnode.name,
8213                            "creating instance %s" % self.op.instance_name,
8214                            self.be_full[constants.BE_MEMORY],
8215                            self.op.hypervisor)
8216
8217     self.dry_run_result = list(nodenames)
8218
8219   def Exec(self, feedback_fn):
8220     """Create and add the instance to the cluster.
8221
8222     """
8223     instance = self.op.instance_name
8224     pnode_name = self.pnode.name
8225
8226     ht_kind = self.op.hypervisor
8227     if ht_kind in constants.HTS_REQ_PORT:
8228       network_port = self.cfg.AllocatePort()
8229     else:
8230       network_port = None
8231
8232     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8233       # this is needed because os.path.join does not accept None arguments
8234       if self.op.file_storage_dir is None:
8235         string_file_storage_dir = ""
8236       else:
8237         string_file_storage_dir = self.op.file_storage_dir
8238
8239       # build the full file storage dir path
8240       if self.op.disk_template == constants.DT_SHARED_FILE:
8241         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8242       else:
8243         get_fsd_fn = self.cfg.GetFileStorageDir
8244
8245       file_storage_dir = utils.PathJoin(get_fsd_fn(),
8246                                         string_file_storage_dir, instance)
8247     else:
8248       file_storage_dir = ""
8249
8250     disks = _GenerateDiskTemplate(self,
8251                                   self.op.disk_template,
8252                                   instance, pnode_name,
8253                                   self.secondaries,
8254                                   self.disks,
8255                                   file_storage_dir,
8256                                   self.op.file_driver,
8257                                   0,
8258                                   feedback_fn)
8259
8260     iobj = objects.Instance(name=instance, os=self.op.os_type,
8261                             primary_node=pnode_name,
8262                             nics=self.nics, disks=disks,
8263                             disk_template=self.op.disk_template,
8264                             admin_up=False,
8265                             network_port=network_port,
8266                             beparams=self.op.beparams,
8267                             hvparams=self.op.hvparams,
8268                             hypervisor=self.op.hypervisor,
8269                             osparams=self.op.osparams,
8270                             )
8271
8272     if self.adopt_disks:
8273       if self.op.disk_template == constants.DT_PLAIN:
8274         # rename LVs to the newly-generated names; we need to construct
8275         # 'fake' LV disks with the old data, plus the new unique_id
8276         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8277         rename_to = []
8278         for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8279           rename_to.append(t_dsk.logical_id)
8280           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8281           self.cfg.SetDiskID(t_dsk, pnode_name)
8282         result = self.rpc.call_blockdev_rename(pnode_name,
8283                                                zip(tmp_disks, rename_to))
8284         result.Raise("Failed to rename adoped LVs")
8285     else:
8286       feedback_fn("* creating instance disks...")
8287       try:
8288         _CreateDisks(self, iobj)
8289       except errors.OpExecError:
8290         self.LogWarning("Device creation failed, reverting...")
8291         try:
8292           _RemoveDisks(self, iobj)
8293         finally:
8294           self.cfg.ReleaseDRBDMinors(instance)
8295           raise
8296
8297     feedback_fn("adding instance %s to cluster config" % instance)
8298
8299     self.cfg.AddInstance(iobj, self.proc.GetECId())
8300
8301     # Declare that we don't want to remove the instance lock anymore, as we've
8302     # added the instance to the config
8303     del self.remove_locks[locking.LEVEL_INSTANCE]
8304
8305     if self.op.mode == constants.INSTANCE_IMPORT:
8306       # Release unused nodes
8307       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8308     else:
8309       # Release all nodes
8310       _ReleaseLocks(self, locking.LEVEL_NODE)
8311
8312     disk_abort = False
8313     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8314       feedback_fn("* wiping instance disks...")
8315       try:
8316         _WipeDisks(self, iobj)
8317       except errors.OpExecError, err:
8318         logging.exception("Wiping disks failed")
8319         self.LogWarning("Wiping instance disks failed (%s)", err)
8320         disk_abort = True
8321
8322     if disk_abort:
8323       # Something is already wrong with the disks, don't do anything else
8324       pass
8325     elif self.op.wait_for_sync:
8326       disk_abort = not _WaitForSync(self, iobj)
8327     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8328       # make sure the disks are not degraded (still sync-ing is ok)
8329       time.sleep(15)
8330       feedback_fn("* checking mirrors status")
8331       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8332     else:
8333       disk_abort = False
8334
8335     if disk_abort:
8336       _RemoveDisks(self, iobj)
8337       self.cfg.RemoveInstance(iobj.name)
8338       # Make sure the instance lock gets removed
8339       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8340       raise errors.OpExecError("There are some degraded disks for"
8341                                " this instance")
8342
8343     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8344       if self.op.mode == constants.INSTANCE_CREATE:
8345         if not self.op.no_install:
8346           feedback_fn("* running the instance OS create scripts...")
8347           # FIXME: pass debug option from opcode to backend
8348           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8349                                                  self.op.debug_level)
8350           result.Raise("Could not add os for instance %s"
8351                        " on node %s" % (instance, pnode_name))
8352
8353       elif self.op.mode == constants.INSTANCE_IMPORT:
8354         feedback_fn("* running the instance OS import scripts...")
8355
8356         transfers = []
8357
8358         for idx, image in enumerate(self.src_images):
8359           if not image:
8360             continue
8361
8362           # FIXME: pass debug option from opcode to backend
8363           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8364                                              constants.IEIO_FILE, (image, ),
8365                                              constants.IEIO_SCRIPT,
8366                                              (iobj.disks[idx], idx),
8367                                              None)
8368           transfers.append(dt)
8369
8370         import_result = \
8371           masterd.instance.TransferInstanceData(self, feedback_fn,
8372                                                 self.op.src_node, pnode_name,
8373                                                 self.pnode.secondary_ip,
8374                                                 iobj, transfers)
8375         if not compat.all(import_result):
8376           self.LogWarning("Some disks for instance %s on node %s were not"
8377                           " imported successfully" % (instance, pnode_name))
8378
8379       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8380         feedback_fn("* preparing remote import...")
8381         # The source cluster will stop the instance before attempting to make a
8382         # connection. In some cases stopping an instance can take a long time,
8383         # hence the shutdown timeout is added to the connection timeout.
8384         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8385                            self.op.source_shutdown_timeout)
8386         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8387
8388         assert iobj.primary_node == self.pnode.name
8389         disk_results = \
8390           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8391                                         self.source_x509_ca,
8392                                         self._cds, timeouts)
8393         if not compat.all(disk_results):
8394           # TODO: Should the instance still be started, even if some disks
8395           # failed to import (valid for local imports, too)?
8396           self.LogWarning("Some disks for instance %s on node %s were not"
8397                           " imported successfully" % (instance, pnode_name))
8398
8399         # Run rename script on newly imported instance
8400         assert iobj.name == instance
8401         feedback_fn("Running rename script for %s" % instance)
8402         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8403                                                    self.source_instance_name,
8404                                                    self.op.debug_level)
8405         if result.fail_msg:
8406           self.LogWarning("Failed to run rename script for %s on node"
8407                           " %s: %s" % (instance, pnode_name, result.fail_msg))
8408
8409       else:
8410         # also checked in the prereq part
8411         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8412                                      % self.op.mode)
8413
8414     if self.op.start:
8415       iobj.admin_up = True
8416       self.cfg.Update(iobj, feedback_fn)
8417       logging.info("Starting instance %s on node %s", instance, pnode_name)
8418       feedback_fn("* starting instance...")
8419       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8420       result.Raise("Could not start instance")
8421
8422     return list(iobj.all_nodes)
8423
8424
8425 class LUInstanceConsole(NoHooksLU):
8426   """Connect to an instance's console.
8427
8428   This is somewhat special in that it returns the command line that
8429   you need to run on the master node in order to connect to the
8430   console.
8431
8432   """
8433   REQ_BGL = False
8434
8435   def ExpandNames(self):
8436     self._ExpandAndLockInstance()
8437
8438   def CheckPrereq(self):
8439     """Check prerequisites.
8440
8441     This checks that the instance is in the cluster.
8442
8443     """
8444     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8445     assert self.instance is not None, \
8446       "Cannot retrieve locked instance %s" % self.op.instance_name
8447     _CheckNodeOnline(self, self.instance.primary_node)
8448
8449   def Exec(self, feedback_fn):
8450     """Connect to the console of an instance
8451
8452     """
8453     instance = self.instance
8454     node = instance.primary_node
8455
8456     node_insts = self.rpc.call_instance_list([node],
8457                                              [instance.hypervisor])[node]
8458     node_insts.Raise("Can't get node information from %s" % node)
8459
8460     if instance.name not in node_insts.payload:
8461       if instance.admin_up:
8462         state = constants.INSTST_ERRORDOWN
8463       else:
8464         state = constants.INSTST_ADMINDOWN
8465       raise errors.OpExecError("Instance %s is not running (state %s)" %
8466                                (instance.name, state))
8467
8468     logging.debug("Connecting to console of %s on %s", instance.name, node)
8469
8470     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8471
8472
8473 def _GetInstanceConsole(cluster, instance):
8474   """Returns console information for an instance.
8475
8476   @type cluster: L{objects.Cluster}
8477   @type instance: L{objects.Instance}
8478   @rtype: dict
8479
8480   """
8481   hyper = hypervisor.GetHypervisor(instance.hypervisor)
8482   # beparams and hvparams are passed separately, to avoid editing the
8483   # instance and then saving the defaults in the instance itself.
8484   hvparams = cluster.FillHV(instance)
8485   beparams = cluster.FillBE(instance)
8486   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8487
8488   assert console.instance == instance.name
8489   assert console.Validate()
8490
8491   return console.ToDict()
8492
8493
8494 class LUInstanceReplaceDisks(LogicalUnit):
8495   """Replace the disks of an instance.
8496
8497   """
8498   HPATH = "mirrors-replace"
8499   HTYPE = constants.HTYPE_INSTANCE
8500   REQ_BGL = False
8501
8502   def CheckArguments(self):
8503     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8504                                   self.op.iallocator)
8505
8506   def ExpandNames(self):
8507     self._ExpandAndLockInstance()
8508
8509     assert locking.LEVEL_NODE not in self.needed_locks
8510     assert locking.LEVEL_NODEGROUP not in self.needed_locks
8511
8512     assert self.op.iallocator is None or self.op.remote_node is None, \
8513       "Conflicting options"
8514
8515     if self.op.remote_node is not None:
8516       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8517
8518       # Warning: do not remove the locking of the new secondary here
8519       # unless DRBD8.AddChildren is changed to work in parallel;
8520       # currently it doesn't since parallel invocations of
8521       # FindUnusedMinor will conflict
8522       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8523       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8524     else:
8525       self.needed_locks[locking.LEVEL_NODE] = []
8526       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8527
8528       if self.op.iallocator is not None:
8529         # iallocator will select a new node in the same group
8530         self.needed_locks[locking.LEVEL_NODEGROUP] = []
8531
8532     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8533                                    self.op.iallocator, self.op.remote_node,
8534                                    self.op.disks, False, self.op.early_release)
8535
8536     self.tasklets = [self.replacer]
8537
8538   def DeclareLocks(self, level):
8539     if level == locking.LEVEL_NODEGROUP:
8540       assert self.op.remote_node is None
8541       assert self.op.iallocator is not None
8542       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8543
8544       self.share_locks[locking.LEVEL_NODEGROUP] = 1
8545       self.needed_locks[locking.LEVEL_NODEGROUP] = \
8546         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8547
8548     elif level == locking.LEVEL_NODE:
8549       if self.op.iallocator is not None:
8550         assert self.op.remote_node is None
8551         assert not self.needed_locks[locking.LEVEL_NODE]
8552
8553         # Lock member nodes of all locked groups
8554         self.needed_locks[locking.LEVEL_NODE] = [node_name
8555           for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8556           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8557       else:
8558         self._LockInstancesNodes()
8559
8560   def BuildHooksEnv(self):
8561     """Build hooks env.
8562
8563     This runs on the master, the primary and all the secondaries.
8564
8565     """
8566     instance = self.replacer.instance
8567     env = {
8568       "MODE": self.op.mode,
8569       "NEW_SECONDARY": self.op.remote_node,
8570       "OLD_SECONDARY": instance.secondary_nodes[0],
8571       }
8572     env.update(_BuildInstanceHookEnvByObject(self, instance))
8573     return env
8574
8575   def BuildHooksNodes(self):
8576     """Build hooks nodes.
8577
8578     """
8579     instance = self.replacer.instance
8580     nl = [
8581       self.cfg.GetMasterNode(),
8582       instance.primary_node,
8583       ]
8584     if self.op.remote_node is not None:
8585       nl.append(self.op.remote_node)
8586     return nl, nl
8587
8588   def CheckPrereq(self):
8589     """Check prerequisites.
8590
8591     """
8592     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8593             self.op.iallocator is None)
8594
8595     owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8596     if owned_groups:
8597       groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8598       if owned_groups != groups:
8599         raise errors.OpExecError("Node groups used by instance '%s' changed"
8600                                  " since lock was acquired, current list is %r,"
8601                                  " used to be '%s'" %
8602                                  (self.op.instance_name,
8603                                   utils.CommaJoin(groups),
8604                                   utils.CommaJoin(owned_groups)))
8605
8606     return LogicalUnit.CheckPrereq(self)
8607
8608
8609 class TLReplaceDisks(Tasklet):
8610   """Replaces disks for an instance.
8611
8612   Note: Locking is not within the scope of this class.
8613
8614   """
8615   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8616                disks, delay_iallocator, early_release):
8617     """Initializes this class.
8618
8619     """
8620     Tasklet.__init__(self, lu)
8621
8622     # Parameters
8623     self.instance_name = instance_name
8624     self.mode = mode
8625     self.iallocator_name = iallocator_name
8626     self.remote_node = remote_node
8627     self.disks = disks
8628     self.delay_iallocator = delay_iallocator
8629     self.early_release = early_release
8630
8631     # Runtime data
8632     self.instance = None
8633     self.new_node = None
8634     self.target_node = None
8635     self.other_node = None
8636     self.remote_node_info = None
8637     self.node_secondary_ip = None
8638
8639   @staticmethod
8640   def CheckArguments(mode, remote_node, iallocator):
8641     """Helper function for users of this class.
8642
8643     """
8644     # check for valid parameter combination
8645     if mode == constants.REPLACE_DISK_CHG:
8646       if remote_node is None and iallocator is None:
8647         raise errors.OpPrereqError("When changing the secondary either an"
8648                                    " iallocator script must be used or the"
8649                                    " new node given", errors.ECODE_INVAL)
8650
8651       if remote_node is not None and iallocator is not None:
8652         raise errors.OpPrereqError("Give either the iallocator or the new"
8653                                    " secondary, not both", errors.ECODE_INVAL)
8654
8655     elif remote_node is not None or iallocator is not None:
8656       # Not replacing the secondary
8657       raise errors.OpPrereqError("The iallocator and new node options can"
8658                                  " only be used when changing the"
8659                                  " secondary node", errors.ECODE_INVAL)
8660
8661   @staticmethod
8662   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8663     """Compute a new secondary node using an IAllocator.
8664
8665     """
8666     ial = IAllocator(lu.cfg, lu.rpc,
8667                      mode=constants.IALLOCATOR_MODE_RELOC,
8668                      name=instance_name,
8669                      relocate_from=relocate_from)
8670
8671     ial.Run(iallocator_name)
8672
8673     if not ial.success:
8674       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8675                                  " %s" % (iallocator_name, ial.info),
8676                                  errors.ECODE_NORES)
8677
8678     if len(ial.result) != ial.required_nodes:
8679       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8680                                  " of nodes (%s), required %s" %
8681                                  (iallocator_name,
8682                                   len(ial.result), ial.required_nodes),
8683                                  errors.ECODE_FAULT)
8684
8685     remote_node_name = ial.result[0]
8686
8687     lu.LogInfo("Selected new secondary for instance '%s': %s",
8688                instance_name, remote_node_name)
8689
8690     return remote_node_name
8691
8692   def _FindFaultyDisks(self, node_name):
8693     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8694                                     node_name, True)
8695
8696   def _CheckDisksActivated(self, instance):
8697     """Checks if the instance disks are activated.
8698
8699     @param instance: The instance to check disks
8700     @return: True if they are activated, False otherwise
8701
8702     """
8703     nodes = instance.all_nodes
8704
8705     for idx, dev in enumerate(instance.disks):
8706       for node in nodes:
8707         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8708         self.cfg.SetDiskID(dev, node)
8709
8710         result = self.rpc.call_blockdev_find(node, dev)
8711
8712         if result.offline:
8713           continue
8714         elif result.fail_msg or not result.payload:
8715           return False
8716
8717     return True
8718
8719   def CheckPrereq(self):
8720     """Check prerequisites.
8721
8722     This checks that the instance is in the cluster.
8723
8724     """
8725     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8726     assert instance is not None, \
8727       "Cannot retrieve locked instance %s" % self.instance_name
8728
8729     if instance.disk_template != constants.DT_DRBD8:
8730       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8731                                  " instances", errors.ECODE_INVAL)
8732
8733     if len(instance.secondary_nodes) != 1:
8734       raise errors.OpPrereqError("The instance has a strange layout,"
8735                                  " expected one secondary but found %d" %
8736                                  len(instance.secondary_nodes),
8737                                  errors.ECODE_FAULT)
8738
8739     if not self.delay_iallocator:
8740       self._CheckPrereq2()
8741
8742   def _CheckPrereq2(self):
8743     """Check prerequisites, second part.
8744
8745     This function should always be part of CheckPrereq. It was separated and is
8746     now called from Exec because during node evacuation iallocator was only
8747     called with an unmodified cluster model, not taking planned changes into
8748     account.
8749
8750     """
8751     instance = self.instance
8752     secondary_node = instance.secondary_nodes[0]
8753
8754     if self.iallocator_name is None:
8755       remote_node = self.remote_node
8756     else:
8757       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8758                                        instance.name, instance.secondary_nodes)
8759
8760     if remote_node is None:
8761       self.remote_node_info = None
8762     else:
8763       assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
8764              "Remote node '%s' is not locked" % remote_node
8765
8766       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8767       assert self.remote_node_info is not None, \
8768         "Cannot retrieve locked node %s" % remote_node
8769
8770     if remote_node == self.instance.primary_node:
8771       raise errors.OpPrereqError("The specified node is the primary node of"
8772                                  " the instance", errors.ECODE_INVAL)
8773
8774     if remote_node == secondary_node:
8775       raise errors.OpPrereqError("The specified node is already the"
8776                                  " secondary node of the instance",
8777                                  errors.ECODE_INVAL)
8778
8779     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8780                                     constants.REPLACE_DISK_CHG):
8781       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8782                                  errors.ECODE_INVAL)
8783
8784     if self.mode == constants.REPLACE_DISK_AUTO:
8785       if not self._CheckDisksActivated(instance):
8786         raise errors.OpPrereqError("Please run activate-disks on instance %s"
8787                                    " first" % self.instance_name,
8788                                    errors.ECODE_STATE)
8789       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8790       faulty_secondary = self._FindFaultyDisks(secondary_node)
8791
8792       if faulty_primary and faulty_secondary:
8793         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8794                                    " one node and can not be repaired"
8795                                    " automatically" % self.instance_name,
8796                                    errors.ECODE_STATE)
8797
8798       if faulty_primary:
8799         self.disks = faulty_primary
8800         self.target_node = instance.primary_node
8801         self.other_node = secondary_node
8802         check_nodes = [self.target_node, self.other_node]
8803       elif faulty_secondary:
8804         self.disks = faulty_secondary
8805         self.target_node = secondary_node
8806         self.other_node = instance.primary_node
8807         check_nodes = [self.target_node, self.other_node]
8808       else:
8809         self.disks = []
8810         check_nodes = []
8811
8812     else:
8813       # Non-automatic modes
8814       if self.mode == constants.REPLACE_DISK_PRI:
8815         self.target_node = instance.primary_node
8816         self.other_node = secondary_node
8817         check_nodes = [self.target_node, self.other_node]
8818
8819       elif self.mode == constants.REPLACE_DISK_SEC:
8820         self.target_node = secondary_node
8821         self.other_node = instance.primary_node
8822         check_nodes = [self.target_node, self.other_node]
8823
8824       elif self.mode == constants.REPLACE_DISK_CHG:
8825         self.new_node = remote_node
8826         self.other_node = instance.primary_node
8827         self.target_node = secondary_node
8828         check_nodes = [self.new_node, self.other_node]
8829
8830         _CheckNodeNotDrained(self.lu, remote_node)
8831         _CheckNodeVmCapable(self.lu, remote_node)
8832
8833         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8834         assert old_node_info is not None
8835         if old_node_info.offline and not self.early_release:
8836           # doesn't make sense to delay the release
8837           self.early_release = True
8838           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8839                           " early-release mode", secondary_node)
8840
8841       else:
8842         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8843                                      self.mode)
8844
8845       # If not specified all disks should be replaced
8846       if not self.disks:
8847         self.disks = range(len(self.instance.disks))
8848
8849     for node in check_nodes:
8850       _CheckNodeOnline(self.lu, node)
8851
8852     touched_nodes = frozenset(node_name for node_name in [self.new_node,
8853                                                           self.other_node,
8854                                                           self.target_node]
8855                               if node_name is not None)
8856
8857     # Release unneeded node locks
8858     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
8859
8860     # Release any owned node group
8861     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
8862       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
8863
8864     # Check whether disks are valid
8865     for disk_idx in self.disks:
8866       instance.FindDisk(disk_idx)
8867
8868     # Get secondary node IP addresses
8869     self.node_secondary_ip = \
8870       dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
8871            for node_name in touched_nodes)
8872
8873   def Exec(self, feedback_fn):
8874     """Execute disk replacement.
8875
8876     This dispatches the disk replacement to the appropriate handler.
8877
8878     """
8879     if self.delay_iallocator:
8880       self._CheckPrereq2()
8881
8882     if __debug__:
8883       # Verify owned locks before starting operation
8884       owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
8885       assert set(owned_locks) == set(self.node_secondary_ip), \
8886           ("Incorrect node locks, owning %s, expected %s" %
8887            (owned_locks, self.node_secondary_ip.keys()))
8888
8889       owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
8890       assert list(owned_locks) == [self.instance_name], \
8891           "Instance '%s' not locked" % self.instance_name
8892
8893       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
8894           "Should not own any node group lock at this point"
8895
8896     if not self.disks:
8897       feedback_fn("No disks need replacement")
8898       return
8899
8900     feedback_fn("Replacing disk(s) %s for %s" %
8901                 (utils.CommaJoin(self.disks), self.instance.name))
8902
8903     activate_disks = (not self.instance.admin_up)
8904
8905     # Activate the instance disks if we're replacing them on a down instance
8906     if activate_disks:
8907       _StartInstanceDisks(self.lu, self.instance, True)
8908
8909     try:
8910       # Should we replace the secondary node?
8911       if self.new_node is not None:
8912         fn = self._ExecDrbd8Secondary
8913       else:
8914         fn = self._ExecDrbd8DiskOnly
8915
8916       result = fn(feedback_fn)
8917     finally:
8918       # Deactivate the instance disks if we're replacing them on a
8919       # down instance
8920       if activate_disks:
8921         _SafeShutdownInstanceDisks(self.lu, self.instance)
8922
8923     if __debug__:
8924       # Verify owned locks
8925       owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
8926       nodes = frozenset(self.node_secondary_ip)
8927       assert ((self.early_release and not owned_locks) or
8928               (not self.early_release and not (set(owned_locks) - nodes))), \
8929         ("Not owning the correct locks, early_release=%s, owned=%r,"
8930          " nodes=%r" % (self.early_release, owned_locks, nodes))
8931
8932     return result
8933
8934   def _CheckVolumeGroup(self, nodes):
8935     self.lu.LogInfo("Checking volume groups")
8936
8937     vgname = self.cfg.GetVGName()
8938
8939     # Make sure volume group exists on all involved nodes
8940     results = self.rpc.call_vg_list(nodes)
8941     if not results:
8942       raise errors.OpExecError("Can't list volume groups on the nodes")
8943
8944     for node in nodes:
8945       res = results[node]
8946       res.Raise("Error checking node %s" % node)
8947       if vgname not in res.payload:
8948         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8949                                  (vgname, node))
8950
8951   def _CheckDisksExistence(self, nodes):
8952     # Check disk existence
8953     for idx, dev in enumerate(self.instance.disks):
8954       if idx not in self.disks:
8955         continue
8956
8957       for node in nodes:
8958         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8959         self.cfg.SetDiskID(dev, node)
8960
8961         result = self.rpc.call_blockdev_find(node, dev)
8962
8963         msg = result.fail_msg
8964         if msg or not result.payload:
8965           if not msg:
8966             msg = "disk not found"
8967           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8968                                    (idx, node, msg))
8969
8970   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8971     for idx, dev in enumerate(self.instance.disks):
8972       if idx not in self.disks:
8973         continue
8974
8975       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8976                       (idx, node_name))
8977
8978       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8979                                    ldisk=ldisk):
8980         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8981                                  " replace disks for instance %s" %
8982                                  (node_name, self.instance.name))
8983
8984   def _CreateNewStorage(self, node_name):
8985     iv_names = {}
8986
8987     for idx, dev in enumerate(self.instance.disks):
8988       if idx not in self.disks:
8989         continue
8990
8991       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8992
8993       self.cfg.SetDiskID(dev, node_name)
8994
8995       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8996       names = _GenerateUniqueNames(self.lu, lv_names)
8997
8998       vg_data = dev.children[0].logical_id[0]
8999       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9000                              logical_id=(vg_data, names[0]))
9001       vg_meta = dev.children[1].logical_id[0]
9002       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9003                              logical_id=(vg_meta, names[1]))
9004
9005       new_lvs = [lv_data, lv_meta]
9006       old_lvs = dev.children
9007       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9008
9009       # we pass force_create=True to force the LVM creation
9010       for new_lv in new_lvs:
9011         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9012                         _GetInstanceInfoText(self.instance), False)
9013
9014     return iv_names
9015
9016   def _CheckDevices(self, node_name, iv_names):
9017     for name, (dev, _, _) in iv_names.iteritems():
9018       self.cfg.SetDiskID(dev, node_name)
9019
9020       result = self.rpc.call_blockdev_find(node_name, dev)
9021
9022       msg = result.fail_msg
9023       if msg or not result.payload:
9024         if not msg:
9025           msg = "disk not found"
9026         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9027                                  (name, msg))
9028
9029       if result.payload.is_degraded:
9030         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9031
9032   def _RemoveOldStorage(self, node_name, iv_names):
9033     for name, (_, old_lvs, _) in iv_names.iteritems():
9034       self.lu.LogInfo("Remove logical volumes for %s" % name)
9035
9036       for lv in old_lvs:
9037         self.cfg.SetDiskID(lv, node_name)
9038
9039         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9040         if msg:
9041           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9042                              hint="remove unused LVs manually")
9043
9044   def _ExecDrbd8DiskOnly(self, feedback_fn):
9045     """Replace a disk on the primary or secondary for DRBD 8.
9046
9047     The algorithm for replace is quite complicated:
9048
9049       1. for each disk to be replaced:
9050
9051         1. create new LVs on the target node with unique names
9052         1. detach old LVs from the drbd device
9053         1. rename old LVs to name_replaced.<time_t>
9054         1. rename new LVs to old LVs
9055         1. attach the new LVs (with the old names now) to the drbd device
9056
9057       1. wait for sync across all devices
9058
9059       1. for each modified disk:
9060
9061         1. remove old LVs (which have the name name_replaces.<time_t>)
9062
9063     Failures are not very well handled.
9064
9065     """
9066     steps_total = 6
9067
9068     # Step: check device activation
9069     self.lu.LogStep(1, steps_total, "Check device existence")
9070     self._CheckDisksExistence([self.other_node, self.target_node])
9071     self._CheckVolumeGroup([self.target_node, self.other_node])
9072
9073     # Step: check other node consistency
9074     self.lu.LogStep(2, steps_total, "Check peer consistency")
9075     self._CheckDisksConsistency(self.other_node,
9076                                 self.other_node == self.instance.primary_node,
9077                                 False)
9078
9079     # Step: create new storage
9080     self.lu.LogStep(3, steps_total, "Allocate new storage")
9081     iv_names = self._CreateNewStorage(self.target_node)
9082
9083     # Step: for each lv, detach+rename*2+attach
9084     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9085     for dev, old_lvs, new_lvs in iv_names.itervalues():
9086       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9087
9088       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9089                                                      old_lvs)
9090       result.Raise("Can't detach drbd from local storage on node"
9091                    " %s for device %s" % (self.target_node, dev.iv_name))
9092       #dev.children = []
9093       #cfg.Update(instance)
9094
9095       # ok, we created the new LVs, so now we know we have the needed
9096       # storage; as such, we proceed on the target node to rename
9097       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9098       # using the assumption that logical_id == physical_id (which in
9099       # turn is the unique_id on that node)
9100
9101       # FIXME(iustin): use a better name for the replaced LVs
9102       temp_suffix = int(time.time())
9103       ren_fn = lambda d, suff: (d.physical_id[0],
9104                                 d.physical_id[1] + "_replaced-%s" % suff)
9105
9106       # Build the rename list based on what LVs exist on the node
9107       rename_old_to_new = []
9108       for to_ren in old_lvs:
9109         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9110         if not result.fail_msg and result.payload:
9111           # device exists
9112           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9113
9114       self.lu.LogInfo("Renaming the old LVs on the target node")
9115       result = self.rpc.call_blockdev_rename(self.target_node,
9116                                              rename_old_to_new)
9117       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9118
9119       # Now we rename the new LVs to the old LVs
9120       self.lu.LogInfo("Renaming the new LVs on the target node")
9121       rename_new_to_old = [(new, old.physical_id)
9122                            for old, new in zip(old_lvs, new_lvs)]
9123       result = self.rpc.call_blockdev_rename(self.target_node,
9124                                              rename_new_to_old)
9125       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9126
9127       for old, new in zip(old_lvs, new_lvs):
9128         new.logical_id = old.logical_id
9129         self.cfg.SetDiskID(new, self.target_node)
9130
9131       for disk in old_lvs:
9132         disk.logical_id = ren_fn(disk, temp_suffix)
9133         self.cfg.SetDiskID(disk, self.target_node)
9134
9135       # Now that the new lvs have the old name, we can add them to the device
9136       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9137       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9138                                                   new_lvs)
9139       msg = result.fail_msg
9140       if msg:
9141         for new_lv in new_lvs:
9142           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9143                                                new_lv).fail_msg
9144           if msg2:
9145             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9146                                hint=("cleanup manually the unused logical"
9147                                      "volumes"))
9148         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9149
9150       dev.children = new_lvs
9151
9152       self.cfg.Update(self.instance, feedback_fn)
9153
9154     cstep = 5
9155     if self.early_release:
9156       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9157       cstep += 1
9158       self._RemoveOldStorage(self.target_node, iv_names)
9159       # WARNING: we release both node locks here, do not do other RPCs
9160       # than WaitForSync to the primary node
9161       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9162                     names=[self.target_node, self.other_node])
9163
9164     # Wait for sync
9165     # This can fail as the old devices are degraded and _WaitForSync
9166     # does a combined result over all disks, so we don't check its return value
9167     self.lu.LogStep(cstep, steps_total, "Sync devices")
9168     cstep += 1
9169     _WaitForSync(self.lu, self.instance)
9170
9171     # Check all devices manually
9172     self._CheckDevices(self.instance.primary_node, iv_names)
9173
9174     # Step: remove old storage
9175     if not self.early_release:
9176       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9177       cstep += 1
9178       self._RemoveOldStorage(self.target_node, iv_names)
9179
9180   def _ExecDrbd8Secondary(self, feedback_fn):
9181     """Replace the secondary node for DRBD 8.
9182
9183     The algorithm for replace is quite complicated:
9184       - for all disks of the instance:
9185         - create new LVs on the new node with same names
9186         - shutdown the drbd device on the old secondary
9187         - disconnect the drbd network on the primary
9188         - create the drbd device on the new secondary
9189         - network attach the drbd on the primary, using an artifice:
9190           the drbd code for Attach() will connect to the network if it
9191           finds a device which is connected to the good local disks but
9192           not network enabled
9193       - wait for sync across all devices
9194       - remove all disks from the old secondary
9195
9196     Failures are not very well handled.
9197
9198     """
9199     steps_total = 6
9200
9201     # Step: check device activation
9202     self.lu.LogStep(1, steps_total, "Check device existence")
9203     self._CheckDisksExistence([self.instance.primary_node])
9204     self._CheckVolumeGroup([self.instance.primary_node])
9205
9206     # Step: check other node consistency
9207     self.lu.LogStep(2, steps_total, "Check peer consistency")
9208     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9209
9210     # Step: create new storage
9211     self.lu.LogStep(3, steps_total, "Allocate new storage")
9212     for idx, dev in enumerate(self.instance.disks):
9213       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9214                       (self.new_node, idx))
9215       # we pass force_create=True to force LVM creation
9216       for new_lv in dev.children:
9217         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9218                         _GetInstanceInfoText(self.instance), False)
9219
9220     # Step 4: dbrd minors and drbd setups changes
9221     # after this, we must manually remove the drbd minors on both the
9222     # error and the success paths
9223     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9224     minors = self.cfg.AllocateDRBDMinor([self.new_node
9225                                          for dev in self.instance.disks],
9226                                         self.instance.name)
9227     logging.debug("Allocated minors %r", minors)
9228
9229     iv_names = {}
9230     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9231       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9232                       (self.new_node, idx))
9233       # create new devices on new_node; note that we create two IDs:
9234       # one without port, so the drbd will be activated without
9235       # networking information on the new node at this stage, and one
9236       # with network, for the latter activation in step 4
9237       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9238       if self.instance.primary_node == o_node1:
9239         p_minor = o_minor1
9240       else:
9241         assert self.instance.primary_node == o_node2, "Three-node instance?"
9242         p_minor = o_minor2
9243
9244       new_alone_id = (self.instance.primary_node, self.new_node, None,
9245                       p_minor, new_minor, o_secret)
9246       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9247                     p_minor, new_minor, o_secret)
9248
9249       iv_names[idx] = (dev, dev.children, new_net_id)
9250       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9251                     new_net_id)
9252       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9253                               logical_id=new_alone_id,
9254                               children=dev.children,
9255                               size=dev.size)
9256       try:
9257         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9258                               _GetInstanceInfoText(self.instance), False)
9259       except errors.GenericError:
9260         self.cfg.ReleaseDRBDMinors(self.instance.name)
9261         raise
9262
9263     # We have new devices, shutdown the drbd on the old secondary
9264     for idx, dev in enumerate(self.instance.disks):
9265       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9266       self.cfg.SetDiskID(dev, self.target_node)
9267       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9268       if msg:
9269         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9270                            "node: %s" % (idx, msg),
9271                            hint=("Please cleanup this device manually as"
9272                                  " soon as possible"))
9273
9274     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9275     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9276                                                self.node_secondary_ip,
9277                                                self.instance.disks)\
9278                                               [self.instance.primary_node]
9279
9280     msg = result.fail_msg
9281     if msg:
9282       # detaches didn't succeed (unlikely)
9283       self.cfg.ReleaseDRBDMinors(self.instance.name)
9284       raise errors.OpExecError("Can't detach the disks from the network on"
9285                                " old node: %s" % (msg,))
9286
9287     # if we managed to detach at least one, we update all the disks of
9288     # the instance to point to the new secondary
9289     self.lu.LogInfo("Updating instance configuration")
9290     for dev, _, new_logical_id in iv_names.itervalues():
9291       dev.logical_id = new_logical_id
9292       self.cfg.SetDiskID(dev, self.instance.primary_node)
9293
9294     self.cfg.Update(self.instance, feedback_fn)
9295
9296     # and now perform the drbd attach
9297     self.lu.LogInfo("Attaching primary drbds to new secondary"
9298                     " (standalone => connected)")
9299     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9300                                             self.new_node],
9301                                            self.node_secondary_ip,
9302                                            self.instance.disks,
9303                                            self.instance.name,
9304                                            False)
9305     for to_node, to_result in result.items():
9306       msg = to_result.fail_msg
9307       if msg:
9308         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9309                            to_node, msg,
9310                            hint=("please do a gnt-instance info to see the"
9311                                  " status of disks"))
9312     cstep = 5
9313     if self.early_release:
9314       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9315       cstep += 1
9316       self._RemoveOldStorage(self.target_node, iv_names)
9317       # WARNING: we release all node locks here, do not do other RPCs
9318       # than WaitForSync to the primary node
9319       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9320                     names=[self.instance.primary_node,
9321                            self.target_node,
9322                            self.new_node])
9323
9324     # Wait for sync
9325     # This can fail as the old devices are degraded and _WaitForSync
9326     # does a combined result over all disks, so we don't check its return value
9327     self.lu.LogStep(cstep, steps_total, "Sync devices")
9328     cstep += 1
9329     _WaitForSync(self.lu, self.instance)
9330
9331     # Check all devices manually
9332     self._CheckDevices(self.instance.primary_node, iv_names)
9333
9334     # Step: remove old storage
9335     if not self.early_release:
9336       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9337       self._RemoveOldStorage(self.target_node, iv_names)
9338
9339
9340 class LURepairNodeStorage(NoHooksLU):
9341   """Repairs the volume group on a node.
9342
9343   """
9344   REQ_BGL = False
9345
9346   def CheckArguments(self):
9347     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9348
9349     storage_type = self.op.storage_type
9350
9351     if (constants.SO_FIX_CONSISTENCY not in
9352         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9353       raise errors.OpPrereqError("Storage units of type '%s' can not be"
9354                                  " repaired" % storage_type,
9355                                  errors.ECODE_INVAL)
9356
9357   def ExpandNames(self):
9358     self.needed_locks = {
9359       locking.LEVEL_NODE: [self.op.node_name],
9360       }
9361
9362   def _CheckFaultyDisks(self, instance, node_name):
9363     """Ensure faulty disks abort the opcode or at least warn."""
9364     try:
9365       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9366                                   node_name, True):
9367         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9368                                    " node '%s'" % (instance.name, node_name),
9369                                    errors.ECODE_STATE)
9370     except errors.OpPrereqError, err:
9371       if self.op.ignore_consistency:
9372         self.proc.LogWarning(str(err.args[0]))
9373       else:
9374         raise
9375
9376   def CheckPrereq(self):
9377     """Check prerequisites.
9378
9379     """
9380     # Check whether any instance on this node has faulty disks
9381     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9382       if not inst.admin_up:
9383         continue
9384       check_nodes = set(inst.all_nodes)
9385       check_nodes.discard(self.op.node_name)
9386       for inst_node_name in check_nodes:
9387         self._CheckFaultyDisks(inst, inst_node_name)
9388
9389   def Exec(self, feedback_fn):
9390     feedback_fn("Repairing storage unit '%s' on %s ..." %
9391                 (self.op.name, self.op.node_name))
9392
9393     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9394     result = self.rpc.call_storage_execute(self.op.node_name,
9395                                            self.op.storage_type, st_args,
9396                                            self.op.name,
9397                                            constants.SO_FIX_CONSISTENCY)
9398     result.Raise("Failed to repair storage unit '%s' on %s" %
9399                  (self.op.name, self.op.node_name))
9400
9401
9402 class LUNodeEvacStrategy(NoHooksLU):
9403   """Computes the node evacuation strategy.
9404
9405   """
9406   REQ_BGL = False
9407
9408   def CheckArguments(self):
9409     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9410
9411   def ExpandNames(self):
9412     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9413     self.needed_locks = locks = {}
9414     if self.op.remote_node is None:
9415       locks[locking.LEVEL_NODE] = locking.ALL_SET
9416     else:
9417       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9418       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9419
9420   def Exec(self, feedback_fn):
9421     if self.op.remote_node is not None:
9422       instances = []
9423       for node in self.op.nodes:
9424         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9425       result = []
9426       for i in instances:
9427         if i.primary_node == self.op.remote_node:
9428           raise errors.OpPrereqError("Node %s is the primary node of"
9429                                      " instance %s, cannot use it as"
9430                                      " secondary" %
9431                                      (self.op.remote_node, i.name),
9432                                      errors.ECODE_INVAL)
9433         result.append([i.name, self.op.remote_node])
9434     else:
9435       ial = IAllocator(self.cfg, self.rpc,
9436                        mode=constants.IALLOCATOR_MODE_MEVAC,
9437                        evac_nodes=self.op.nodes)
9438       ial.Run(self.op.iallocator, validate=True)
9439       if not ial.success:
9440         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9441                                  errors.ECODE_NORES)
9442       result = ial.result
9443     return result
9444
9445
9446 class LUInstanceGrowDisk(LogicalUnit):
9447   """Grow a disk of an instance.
9448
9449   """
9450   HPATH = "disk-grow"
9451   HTYPE = constants.HTYPE_INSTANCE
9452   REQ_BGL = False
9453
9454   def ExpandNames(self):
9455     self._ExpandAndLockInstance()
9456     self.needed_locks[locking.LEVEL_NODE] = []
9457     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9458
9459   def DeclareLocks(self, level):
9460     if level == locking.LEVEL_NODE:
9461       self._LockInstancesNodes()
9462
9463   def BuildHooksEnv(self):
9464     """Build hooks env.
9465
9466     This runs on the master, the primary and all the secondaries.
9467
9468     """
9469     env = {
9470       "DISK": self.op.disk,
9471       "AMOUNT": self.op.amount,
9472       }
9473     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9474     return env
9475
9476   def BuildHooksNodes(self):
9477     """Build hooks nodes.
9478
9479     """
9480     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9481     return (nl, nl)
9482
9483   def CheckPrereq(self):
9484     """Check prerequisites.
9485
9486     This checks that the instance is in the cluster.
9487
9488     """
9489     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9490     assert instance is not None, \
9491       "Cannot retrieve locked instance %s" % self.op.instance_name
9492     nodenames = list(instance.all_nodes)
9493     for node in nodenames:
9494       _CheckNodeOnline(self, node)
9495
9496     self.instance = instance
9497
9498     if instance.disk_template not in constants.DTS_GROWABLE:
9499       raise errors.OpPrereqError("Instance's disk layout does not support"
9500                                  " growing", errors.ECODE_INVAL)
9501
9502     self.disk = instance.FindDisk(self.op.disk)
9503
9504     if instance.disk_template not in (constants.DT_FILE,
9505                                       constants.DT_SHARED_FILE):
9506       # TODO: check the free disk space for file, when that feature will be
9507       # supported
9508       _CheckNodesFreeDiskPerVG(self, nodenames,
9509                                self.disk.ComputeGrowth(self.op.amount))
9510
9511   def Exec(self, feedback_fn):
9512     """Execute disk grow.
9513
9514     """
9515     instance = self.instance
9516     disk = self.disk
9517
9518     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9519     if not disks_ok:
9520       raise errors.OpExecError("Cannot activate block device to grow")
9521
9522     for node in instance.all_nodes:
9523       self.cfg.SetDiskID(disk, node)
9524       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9525       result.Raise("Grow request failed to node %s" % node)
9526
9527       # TODO: Rewrite code to work properly
9528       # DRBD goes into sync mode for a short amount of time after executing the
9529       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9530       # calling "resize" in sync mode fails. Sleeping for a short amount of
9531       # time is a work-around.
9532       time.sleep(5)
9533
9534     disk.RecordGrow(self.op.amount)
9535     self.cfg.Update(instance, feedback_fn)
9536     if self.op.wait_for_sync:
9537       disk_abort = not _WaitForSync(self, instance, disks=[disk])
9538       if disk_abort:
9539         self.proc.LogWarning("Disk sync-ing has not returned a good"
9540                              " status; please check the instance")
9541       if not instance.admin_up:
9542         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9543     elif not instance.admin_up:
9544       self.proc.LogWarning("Not shutting down the disk even if the instance is"
9545                            " not supposed to be running because no wait for"
9546                            " sync mode was requested")
9547
9548
9549 class LUInstanceQueryData(NoHooksLU):
9550   """Query runtime instance data.
9551
9552   """
9553   REQ_BGL = False
9554
9555   def ExpandNames(self):
9556     self.needed_locks = {}
9557
9558     # Use locking if requested or when non-static information is wanted
9559     if not (self.op.static or self.op.use_locking):
9560       self.LogWarning("Non-static data requested, locks need to be acquired")
9561       self.op.use_locking = True
9562
9563     if self.op.instances or not self.op.use_locking:
9564       # Expand instance names right here
9565       self.wanted_names = _GetWantedInstances(self, self.op.instances)
9566     else:
9567       # Will use acquired locks
9568       self.wanted_names = None
9569
9570     if self.op.use_locking:
9571       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9572
9573       if self.wanted_names is None:
9574         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9575       else:
9576         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9577
9578       self.needed_locks[locking.LEVEL_NODE] = []
9579       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9580       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9581
9582   def DeclareLocks(self, level):
9583     if self.op.use_locking and level == locking.LEVEL_NODE:
9584       self._LockInstancesNodes()
9585
9586   def CheckPrereq(self):
9587     """Check prerequisites.
9588
9589     This only checks the optional instance list against the existing names.
9590
9591     """
9592     if self.wanted_names is None:
9593       assert self.op.use_locking, "Locking was not used"
9594       self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
9595
9596     self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9597                              for name in self.wanted_names]
9598
9599   def _ComputeBlockdevStatus(self, node, instance_name, dev):
9600     """Returns the status of a block device
9601
9602     """
9603     if self.op.static or not node:
9604       return None
9605
9606     self.cfg.SetDiskID(dev, node)
9607
9608     result = self.rpc.call_blockdev_find(node, dev)
9609     if result.offline:
9610       return None
9611
9612     result.Raise("Can't compute disk status for %s" % instance_name)
9613
9614     status = result.payload
9615     if status is None:
9616       return None
9617
9618     return (status.dev_path, status.major, status.minor,
9619             status.sync_percent, status.estimated_time,
9620             status.is_degraded, status.ldisk_status)
9621
9622   def _ComputeDiskStatus(self, instance, snode, dev):
9623     """Compute block device status.
9624
9625     """
9626     if dev.dev_type in constants.LDS_DRBD:
9627       # we change the snode then (otherwise we use the one passed in)
9628       if dev.logical_id[0] == instance.primary_node:
9629         snode = dev.logical_id[1]
9630       else:
9631         snode = dev.logical_id[0]
9632
9633     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9634                                               instance.name, dev)
9635     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9636
9637     if dev.children:
9638       dev_children = [self._ComputeDiskStatus(instance, snode, child)
9639                       for child in dev.children]
9640     else:
9641       dev_children = []
9642
9643     return {
9644       "iv_name": dev.iv_name,
9645       "dev_type": dev.dev_type,
9646       "logical_id": dev.logical_id,
9647       "physical_id": dev.physical_id,
9648       "pstatus": dev_pstatus,
9649       "sstatus": dev_sstatus,
9650       "children": dev_children,
9651       "mode": dev.mode,
9652       "size": dev.size,
9653       }
9654
9655   def Exec(self, feedback_fn):
9656     """Gather and return data"""
9657     result = {}
9658
9659     cluster = self.cfg.GetClusterInfo()
9660
9661     for instance in self.wanted_instances:
9662       if not self.op.static:
9663         remote_info = self.rpc.call_instance_info(instance.primary_node,
9664                                                   instance.name,
9665                                                   instance.hypervisor)
9666         remote_info.Raise("Error checking node %s" % instance.primary_node)
9667         remote_info = remote_info.payload
9668         if remote_info and "state" in remote_info:
9669           remote_state = "up"
9670         else:
9671           remote_state = "down"
9672       else:
9673         remote_state = None
9674       if instance.admin_up:
9675         config_state = "up"
9676       else:
9677         config_state = "down"
9678
9679       disks = [self._ComputeDiskStatus(instance, None, device)
9680                for device in instance.disks]
9681
9682       result[instance.name] = {
9683         "name": instance.name,
9684         "config_state": config_state,
9685         "run_state": remote_state,
9686         "pnode": instance.primary_node,
9687         "snodes": instance.secondary_nodes,
9688         "os": instance.os,
9689         # this happens to be the same format used for hooks
9690         "nics": _NICListToTuple(self, instance.nics),
9691         "disk_template": instance.disk_template,
9692         "disks": disks,
9693         "hypervisor": instance.hypervisor,
9694         "network_port": instance.network_port,
9695         "hv_instance": instance.hvparams,
9696         "hv_actual": cluster.FillHV(instance, skip_globals=True),
9697         "be_instance": instance.beparams,
9698         "be_actual": cluster.FillBE(instance),
9699         "os_instance": instance.osparams,
9700         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9701         "serial_no": instance.serial_no,
9702         "mtime": instance.mtime,
9703         "ctime": instance.ctime,
9704         "uuid": instance.uuid,
9705         }
9706
9707     return result
9708
9709
9710 class LUInstanceSetParams(LogicalUnit):
9711   """Modifies an instances's parameters.
9712
9713   """
9714   HPATH = "instance-modify"
9715   HTYPE = constants.HTYPE_INSTANCE
9716   REQ_BGL = False
9717
9718   def CheckArguments(self):
9719     if not (self.op.nics or self.op.disks or self.op.disk_template or
9720             self.op.hvparams or self.op.beparams or self.op.os_name):
9721       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9722
9723     if self.op.hvparams:
9724       _CheckGlobalHvParams(self.op.hvparams)
9725
9726     # Disk validation
9727     disk_addremove = 0
9728     for disk_op, disk_dict in self.op.disks:
9729       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9730       if disk_op == constants.DDM_REMOVE:
9731         disk_addremove += 1
9732         continue
9733       elif disk_op == constants.DDM_ADD:
9734         disk_addremove += 1
9735       else:
9736         if not isinstance(disk_op, int):
9737           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9738         if not isinstance(disk_dict, dict):
9739           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9740           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9741
9742       if disk_op == constants.DDM_ADD:
9743         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9744         if mode not in constants.DISK_ACCESS_SET:
9745           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9746                                      errors.ECODE_INVAL)
9747         size = disk_dict.get(constants.IDISK_SIZE, None)
9748         if size is None:
9749           raise errors.OpPrereqError("Required disk parameter size missing",
9750                                      errors.ECODE_INVAL)
9751         try:
9752           size = int(size)
9753         except (TypeError, ValueError), err:
9754           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9755                                      str(err), errors.ECODE_INVAL)
9756         disk_dict[constants.IDISK_SIZE] = size
9757       else:
9758         # modification of disk
9759         if constants.IDISK_SIZE in disk_dict:
9760           raise errors.OpPrereqError("Disk size change not possible, use"
9761                                      " grow-disk", errors.ECODE_INVAL)
9762
9763     if disk_addremove > 1:
9764       raise errors.OpPrereqError("Only one disk add or remove operation"
9765                                  " supported at a time", errors.ECODE_INVAL)
9766
9767     if self.op.disks and self.op.disk_template is not None:
9768       raise errors.OpPrereqError("Disk template conversion and other disk"
9769                                  " changes not supported at the same time",
9770                                  errors.ECODE_INVAL)
9771
9772     if (self.op.disk_template and
9773         self.op.disk_template in constants.DTS_INT_MIRROR and
9774         self.op.remote_node is None):
9775       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9776                                  " one requires specifying a secondary node",
9777                                  errors.ECODE_INVAL)
9778
9779     # NIC validation
9780     nic_addremove = 0
9781     for nic_op, nic_dict in self.op.nics:
9782       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9783       if nic_op == constants.DDM_REMOVE:
9784         nic_addremove += 1
9785         continue
9786       elif nic_op == constants.DDM_ADD:
9787         nic_addremove += 1
9788       else:
9789         if not isinstance(nic_op, int):
9790           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9791         if not isinstance(nic_dict, dict):
9792           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9793           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9794
9795       # nic_dict should be a dict
9796       nic_ip = nic_dict.get(constants.INIC_IP, None)
9797       if nic_ip is not None:
9798         if nic_ip.lower() == constants.VALUE_NONE:
9799           nic_dict[constants.INIC_IP] = None
9800         else:
9801           if not netutils.IPAddress.IsValid(nic_ip):
9802             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9803                                        errors.ECODE_INVAL)
9804
9805       nic_bridge = nic_dict.get('bridge', None)
9806       nic_link = nic_dict.get(constants.INIC_LINK, None)
9807       if nic_bridge and nic_link:
9808         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9809                                    " at the same time", errors.ECODE_INVAL)
9810       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9811         nic_dict['bridge'] = None
9812       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9813         nic_dict[constants.INIC_LINK] = None
9814
9815       if nic_op == constants.DDM_ADD:
9816         nic_mac = nic_dict.get(constants.INIC_MAC, None)
9817         if nic_mac is None:
9818           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9819
9820       if constants.INIC_MAC in nic_dict:
9821         nic_mac = nic_dict[constants.INIC_MAC]
9822         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9823           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9824
9825         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9826           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9827                                      " modifying an existing nic",
9828                                      errors.ECODE_INVAL)
9829
9830     if nic_addremove > 1:
9831       raise errors.OpPrereqError("Only one NIC add or remove operation"
9832                                  " supported at a time", errors.ECODE_INVAL)
9833
9834   def ExpandNames(self):
9835     self._ExpandAndLockInstance()
9836     self.needed_locks[locking.LEVEL_NODE] = []
9837     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9838
9839   def DeclareLocks(self, level):
9840     if level == locking.LEVEL_NODE:
9841       self._LockInstancesNodes()
9842       if self.op.disk_template and self.op.remote_node:
9843         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9844         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9845
9846   def BuildHooksEnv(self):
9847     """Build hooks env.
9848
9849     This runs on the master, primary and secondaries.
9850
9851     """
9852     args = dict()
9853     if constants.BE_MEMORY in self.be_new:
9854       args['memory'] = self.be_new[constants.BE_MEMORY]
9855     if constants.BE_VCPUS in self.be_new:
9856       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9857     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9858     # information at all.
9859     if self.op.nics:
9860       args['nics'] = []
9861       nic_override = dict(self.op.nics)
9862       for idx, nic in enumerate(self.instance.nics):
9863         if idx in nic_override:
9864           this_nic_override = nic_override[idx]
9865         else:
9866           this_nic_override = {}
9867         if constants.INIC_IP in this_nic_override:
9868           ip = this_nic_override[constants.INIC_IP]
9869         else:
9870           ip = nic.ip
9871         if constants.INIC_MAC in this_nic_override:
9872           mac = this_nic_override[constants.INIC_MAC]
9873         else:
9874           mac = nic.mac
9875         if idx in self.nic_pnew:
9876           nicparams = self.nic_pnew[idx]
9877         else:
9878           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9879         mode = nicparams[constants.NIC_MODE]
9880         link = nicparams[constants.NIC_LINK]
9881         args['nics'].append((ip, mac, mode, link))
9882       if constants.DDM_ADD in nic_override:
9883         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
9884         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
9885         nicparams = self.nic_pnew[constants.DDM_ADD]
9886         mode = nicparams[constants.NIC_MODE]
9887         link = nicparams[constants.NIC_LINK]
9888         args['nics'].append((ip, mac, mode, link))
9889       elif constants.DDM_REMOVE in nic_override:
9890         del args['nics'][-1]
9891
9892     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9893     if self.op.disk_template:
9894       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9895
9896     return env
9897
9898   def BuildHooksNodes(self):
9899     """Build hooks nodes.
9900
9901     """
9902     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9903     return (nl, nl)
9904
9905   def CheckPrereq(self):
9906     """Check prerequisites.
9907
9908     This only checks the instance list against the existing names.
9909
9910     """
9911     # checking the new params on the primary/secondary nodes
9912
9913     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9914     cluster = self.cluster = self.cfg.GetClusterInfo()
9915     assert self.instance is not None, \
9916       "Cannot retrieve locked instance %s" % self.op.instance_name
9917     pnode = instance.primary_node
9918     nodelist = list(instance.all_nodes)
9919
9920     # OS change
9921     if self.op.os_name and not self.op.force:
9922       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9923                       self.op.force_variant)
9924       instance_os = self.op.os_name
9925     else:
9926       instance_os = instance.os
9927
9928     if self.op.disk_template:
9929       if instance.disk_template == self.op.disk_template:
9930         raise errors.OpPrereqError("Instance already has disk template %s" %
9931                                    instance.disk_template, errors.ECODE_INVAL)
9932
9933       if (instance.disk_template,
9934           self.op.disk_template) not in self._DISK_CONVERSIONS:
9935         raise errors.OpPrereqError("Unsupported disk template conversion from"
9936                                    " %s to %s" % (instance.disk_template,
9937                                                   self.op.disk_template),
9938                                    errors.ECODE_INVAL)
9939       _CheckInstanceDown(self, instance, "cannot change disk template")
9940       if self.op.disk_template in constants.DTS_INT_MIRROR:
9941         if self.op.remote_node == pnode:
9942           raise errors.OpPrereqError("Given new secondary node %s is the same"
9943                                      " as the primary node of the instance" %
9944                                      self.op.remote_node, errors.ECODE_STATE)
9945         _CheckNodeOnline(self, self.op.remote_node)
9946         _CheckNodeNotDrained(self, self.op.remote_node)
9947         # FIXME: here we assume that the old instance type is DT_PLAIN
9948         assert instance.disk_template == constants.DT_PLAIN
9949         disks = [{constants.IDISK_SIZE: d.size,
9950                   constants.IDISK_VG: d.logical_id[0]}
9951                  for d in instance.disks]
9952         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9953         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9954
9955     # hvparams processing
9956     if self.op.hvparams:
9957       hv_type = instance.hypervisor
9958       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9959       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9960       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9961
9962       # local check
9963       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9964       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9965       self.hv_new = hv_new # the new actual values
9966       self.hv_inst = i_hvdict # the new dict (without defaults)
9967     else:
9968       self.hv_new = self.hv_inst = {}
9969
9970     # beparams processing
9971     if self.op.beparams:
9972       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9973                                    use_none=True)
9974       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9975       be_new = cluster.SimpleFillBE(i_bedict)
9976       self.be_new = be_new # the new actual values
9977       self.be_inst = i_bedict # the new dict (without defaults)
9978     else:
9979       self.be_new = self.be_inst = {}
9980
9981     # osparams processing
9982     if self.op.osparams:
9983       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9984       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9985       self.os_inst = i_osdict # the new dict (without defaults)
9986     else:
9987       self.os_inst = {}
9988
9989     self.warn = []
9990
9991     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9992       mem_check_list = [pnode]
9993       if be_new[constants.BE_AUTO_BALANCE]:
9994         # either we changed auto_balance to yes or it was from before
9995         mem_check_list.extend(instance.secondary_nodes)
9996       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9997                                                   instance.hypervisor)
9998       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9999                                          instance.hypervisor)
10000       pninfo = nodeinfo[pnode]
10001       msg = pninfo.fail_msg
10002       if msg:
10003         # Assume the primary node is unreachable and go ahead
10004         self.warn.append("Can't get info from primary node %s: %s" %
10005                          (pnode,  msg))
10006       elif not isinstance(pninfo.payload.get('memory_free', None), int):
10007         self.warn.append("Node data from primary node %s doesn't contain"
10008                          " free memory information" % pnode)
10009       elif instance_info.fail_msg:
10010         self.warn.append("Can't get instance runtime information: %s" %
10011                         instance_info.fail_msg)
10012       else:
10013         if instance_info.payload:
10014           current_mem = int(instance_info.payload['memory'])
10015         else:
10016           # Assume instance not running
10017           # (there is a slight race condition here, but it's not very probable,
10018           # and we have no other way to check)
10019           current_mem = 0
10020         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10021                     pninfo.payload['memory_free'])
10022         if miss_mem > 0:
10023           raise errors.OpPrereqError("This change will prevent the instance"
10024                                      " from starting, due to %d MB of memory"
10025                                      " missing on its primary node" % miss_mem,
10026                                      errors.ECODE_NORES)
10027
10028       if be_new[constants.BE_AUTO_BALANCE]:
10029         for node, nres in nodeinfo.items():
10030           if node not in instance.secondary_nodes:
10031             continue
10032           msg = nres.fail_msg
10033           if msg:
10034             self.warn.append("Can't get info from secondary node %s: %s" %
10035                              (node, msg))
10036           elif not isinstance(nres.payload.get('memory_free', None), int):
10037             self.warn.append("Secondary node %s didn't return free"
10038                              " memory information" % node)
10039           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
10040             self.warn.append("Not enough memory to failover instance to"
10041                              " secondary node %s" % node)
10042
10043     # NIC processing
10044     self.nic_pnew = {}
10045     self.nic_pinst = {}
10046     for nic_op, nic_dict in self.op.nics:
10047       if nic_op == constants.DDM_REMOVE:
10048         if not instance.nics:
10049           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10050                                      errors.ECODE_INVAL)
10051         continue
10052       if nic_op != constants.DDM_ADD:
10053         # an existing nic
10054         if not instance.nics:
10055           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10056                                      " no NICs" % nic_op,
10057                                      errors.ECODE_INVAL)
10058         if nic_op < 0 or nic_op >= len(instance.nics):
10059           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10060                                      " are 0 to %d" %
10061                                      (nic_op, len(instance.nics) - 1),
10062                                      errors.ECODE_INVAL)
10063         old_nic_params = instance.nics[nic_op].nicparams
10064         old_nic_ip = instance.nics[nic_op].ip
10065       else:
10066         old_nic_params = {}
10067         old_nic_ip = None
10068
10069       update_params_dict = dict([(key, nic_dict[key])
10070                                  for key in constants.NICS_PARAMETERS
10071                                  if key in nic_dict])
10072
10073       if 'bridge' in nic_dict:
10074         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10075
10076       new_nic_params = _GetUpdatedParams(old_nic_params,
10077                                          update_params_dict)
10078       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10079       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10080       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10081       self.nic_pinst[nic_op] = new_nic_params
10082       self.nic_pnew[nic_op] = new_filled_nic_params
10083       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10084
10085       if new_nic_mode == constants.NIC_MODE_BRIDGED:
10086         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10087         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10088         if msg:
10089           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10090           if self.op.force:
10091             self.warn.append(msg)
10092           else:
10093             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10094       if new_nic_mode == constants.NIC_MODE_ROUTED:
10095         if constants.INIC_IP in nic_dict:
10096           nic_ip = nic_dict[constants.INIC_IP]
10097         else:
10098           nic_ip = old_nic_ip
10099         if nic_ip is None:
10100           raise errors.OpPrereqError('Cannot set the nic ip to None'
10101                                      ' on a routed nic', errors.ECODE_INVAL)
10102       if constants.INIC_MAC in nic_dict:
10103         nic_mac = nic_dict[constants.INIC_MAC]
10104         if nic_mac is None:
10105           raise errors.OpPrereqError('Cannot set the nic mac to None',
10106                                      errors.ECODE_INVAL)
10107         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10108           # otherwise generate the mac
10109           nic_dict[constants.INIC_MAC] = \
10110             self.cfg.GenerateMAC(self.proc.GetECId())
10111         else:
10112           # or validate/reserve the current one
10113           try:
10114             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10115           except errors.ReservationError:
10116             raise errors.OpPrereqError("MAC address %s already in use"
10117                                        " in cluster" % nic_mac,
10118                                        errors.ECODE_NOTUNIQUE)
10119
10120     # DISK processing
10121     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10122       raise errors.OpPrereqError("Disk operations not supported for"
10123                                  " diskless instances",
10124                                  errors.ECODE_INVAL)
10125     for disk_op, _ in self.op.disks:
10126       if disk_op == constants.DDM_REMOVE:
10127         if len(instance.disks) == 1:
10128           raise errors.OpPrereqError("Cannot remove the last disk of"
10129                                      " an instance", errors.ECODE_INVAL)
10130         _CheckInstanceDown(self, instance, "cannot remove disks")
10131
10132       if (disk_op == constants.DDM_ADD and
10133           len(instance.disks) >= constants.MAX_DISKS):
10134         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10135                                    " add more" % constants.MAX_DISKS,
10136                                    errors.ECODE_STATE)
10137       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10138         # an existing disk
10139         if disk_op < 0 or disk_op >= len(instance.disks):
10140           raise errors.OpPrereqError("Invalid disk index %s, valid values"
10141                                      " are 0 to %d" %
10142                                      (disk_op, len(instance.disks)),
10143                                      errors.ECODE_INVAL)
10144
10145     return
10146
10147   def _ConvertPlainToDrbd(self, feedback_fn):
10148     """Converts an instance from plain to drbd.
10149
10150     """
10151     feedback_fn("Converting template to drbd")
10152     instance = self.instance
10153     pnode = instance.primary_node
10154     snode = self.op.remote_node
10155
10156     # create a fake disk info for _GenerateDiskTemplate
10157     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10158                   constants.IDISK_VG: d.logical_id[0]}
10159                  for d in instance.disks]
10160     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10161                                       instance.name, pnode, [snode],
10162                                       disk_info, None, None, 0, feedback_fn)
10163     info = _GetInstanceInfoText(instance)
10164     feedback_fn("Creating aditional volumes...")
10165     # first, create the missing data and meta devices
10166     for disk in new_disks:
10167       # unfortunately this is... not too nice
10168       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10169                             info, True)
10170       for child in disk.children:
10171         _CreateSingleBlockDev(self, snode, instance, child, info, True)
10172     # at this stage, all new LVs have been created, we can rename the
10173     # old ones
10174     feedback_fn("Renaming original volumes...")
10175     rename_list = [(o, n.children[0].logical_id)
10176                    for (o, n) in zip(instance.disks, new_disks)]
10177     result = self.rpc.call_blockdev_rename(pnode, rename_list)
10178     result.Raise("Failed to rename original LVs")
10179
10180     feedback_fn("Initializing DRBD devices...")
10181     # all child devices are in place, we can now create the DRBD devices
10182     for disk in new_disks:
10183       for node in [pnode, snode]:
10184         f_create = node == pnode
10185         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10186
10187     # at this point, the instance has been modified
10188     instance.disk_template = constants.DT_DRBD8
10189     instance.disks = new_disks
10190     self.cfg.Update(instance, feedback_fn)
10191
10192     # disks are created, waiting for sync
10193     disk_abort = not _WaitForSync(self, instance)
10194     if disk_abort:
10195       raise errors.OpExecError("There are some degraded disks for"
10196                                " this instance, please cleanup manually")
10197
10198   def _ConvertDrbdToPlain(self, feedback_fn):
10199     """Converts an instance from drbd to plain.
10200
10201     """
10202     instance = self.instance
10203     assert len(instance.secondary_nodes) == 1
10204     pnode = instance.primary_node
10205     snode = instance.secondary_nodes[0]
10206     feedback_fn("Converting template to plain")
10207
10208     old_disks = instance.disks
10209     new_disks = [d.children[0] for d in old_disks]
10210
10211     # copy over size and mode
10212     for parent, child in zip(old_disks, new_disks):
10213       child.size = parent.size
10214       child.mode = parent.mode
10215
10216     # update instance structure
10217     instance.disks = new_disks
10218     instance.disk_template = constants.DT_PLAIN
10219     self.cfg.Update(instance, feedback_fn)
10220
10221     feedback_fn("Removing volumes on the secondary node...")
10222     for disk in old_disks:
10223       self.cfg.SetDiskID(disk, snode)
10224       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10225       if msg:
10226         self.LogWarning("Could not remove block device %s on node %s,"
10227                         " continuing anyway: %s", disk.iv_name, snode, msg)
10228
10229     feedback_fn("Removing unneeded volumes on the primary node...")
10230     for idx, disk in enumerate(old_disks):
10231       meta = disk.children[1]
10232       self.cfg.SetDiskID(meta, pnode)
10233       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10234       if msg:
10235         self.LogWarning("Could not remove metadata for disk %d on node %s,"
10236                         " continuing anyway: %s", idx, pnode, msg)
10237
10238   def Exec(self, feedback_fn):
10239     """Modifies an instance.
10240
10241     All parameters take effect only at the next restart of the instance.
10242
10243     """
10244     # Process here the warnings from CheckPrereq, as we don't have a
10245     # feedback_fn there.
10246     for warn in self.warn:
10247       feedback_fn("WARNING: %s" % warn)
10248
10249     result = []
10250     instance = self.instance
10251     # disk changes
10252     for disk_op, disk_dict in self.op.disks:
10253       if disk_op == constants.DDM_REMOVE:
10254         # remove the last disk
10255         device = instance.disks.pop()
10256         device_idx = len(instance.disks)
10257         for node, disk in device.ComputeNodeTree(instance.primary_node):
10258           self.cfg.SetDiskID(disk, node)
10259           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10260           if msg:
10261             self.LogWarning("Could not remove disk/%d on node %s: %s,"
10262                             " continuing anyway", device_idx, node, msg)
10263         result.append(("disk/%d" % device_idx, "remove"))
10264       elif disk_op == constants.DDM_ADD:
10265         # add a new disk
10266         if instance.disk_template in (constants.DT_FILE,
10267                                         constants.DT_SHARED_FILE):
10268           file_driver, file_path = instance.disks[0].logical_id
10269           file_path = os.path.dirname(file_path)
10270         else:
10271           file_driver = file_path = None
10272         disk_idx_base = len(instance.disks)
10273         new_disk = _GenerateDiskTemplate(self,
10274                                          instance.disk_template,
10275                                          instance.name, instance.primary_node,
10276                                          instance.secondary_nodes,
10277                                          [disk_dict],
10278                                          file_path,
10279                                          file_driver,
10280                                          disk_idx_base, feedback_fn)[0]
10281         instance.disks.append(new_disk)
10282         info = _GetInstanceInfoText(instance)
10283
10284         logging.info("Creating volume %s for instance %s",
10285                      new_disk.iv_name, instance.name)
10286         # Note: this needs to be kept in sync with _CreateDisks
10287         #HARDCODE
10288         for node in instance.all_nodes:
10289           f_create = node == instance.primary_node
10290           try:
10291             _CreateBlockDev(self, node, instance, new_disk,
10292                             f_create, info, f_create)
10293           except errors.OpExecError, err:
10294             self.LogWarning("Failed to create volume %s (%s) on"
10295                             " node %s: %s",
10296                             new_disk.iv_name, new_disk, node, err)
10297         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10298                        (new_disk.size, new_disk.mode)))
10299       else:
10300         # change a given disk
10301         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10302         result.append(("disk.mode/%d" % disk_op,
10303                        disk_dict[constants.IDISK_MODE]))
10304
10305     if self.op.disk_template:
10306       r_shut = _ShutdownInstanceDisks(self, instance)
10307       if not r_shut:
10308         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10309                                  " proceed with disk template conversion")
10310       mode = (instance.disk_template, self.op.disk_template)
10311       try:
10312         self._DISK_CONVERSIONS[mode](self, feedback_fn)
10313       except:
10314         self.cfg.ReleaseDRBDMinors(instance.name)
10315         raise
10316       result.append(("disk_template", self.op.disk_template))
10317
10318     # NIC changes
10319     for nic_op, nic_dict in self.op.nics:
10320       if nic_op == constants.DDM_REMOVE:
10321         # remove the last nic
10322         del instance.nics[-1]
10323         result.append(("nic.%d" % len(instance.nics), "remove"))
10324       elif nic_op == constants.DDM_ADD:
10325         # mac and bridge should be set, by now
10326         mac = nic_dict[constants.INIC_MAC]
10327         ip = nic_dict.get(constants.INIC_IP, None)
10328         nicparams = self.nic_pinst[constants.DDM_ADD]
10329         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10330         instance.nics.append(new_nic)
10331         result.append(("nic.%d" % (len(instance.nics) - 1),
10332                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
10333                        (new_nic.mac, new_nic.ip,
10334                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10335                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10336                        )))
10337       else:
10338         for key in (constants.INIC_MAC, constants.INIC_IP):
10339           if key in nic_dict:
10340             setattr(instance.nics[nic_op], key, nic_dict[key])
10341         if nic_op in self.nic_pinst:
10342           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10343         for key, val in nic_dict.iteritems():
10344           result.append(("nic.%s/%d" % (key, nic_op), val))
10345
10346     # hvparams changes
10347     if self.op.hvparams:
10348       instance.hvparams = self.hv_inst
10349       for key, val in self.op.hvparams.iteritems():
10350         result.append(("hv/%s" % key, val))
10351
10352     # beparams changes
10353     if self.op.beparams:
10354       instance.beparams = self.be_inst
10355       for key, val in self.op.beparams.iteritems():
10356         result.append(("be/%s" % key, val))
10357
10358     # OS change
10359     if self.op.os_name:
10360       instance.os = self.op.os_name
10361
10362     # osparams changes
10363     if self.op.osparams:
10364       instance.osparams = self.os_inst
10365       for key, val in self.op.osparams.iteritems():
10366         result.append(("os/%s" % key, val))
10367
10368     self.cfg.Update(instance, feedback_fn)
10369
10370     return result
10371
10372   _DISK_CONVERSIONS = {
10373     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10374     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10375     }
10376
10377
10378 class LUBackupQuery(NoHooksLU):
10379   """Query the exports list
10380
10381   """
10382   REQ_BGL = False
10383
10384   def ExpandNames(self):
10385     self.needed_locks = {}
10386     self.share_locks[locking.LEVEL_NODE] = 1
10387     if not self.op.nodes:
10388       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10389     else:
10390       self.needed_locks[locking.LEVEL_NODE] = \
10391         _GetWantedNodes(self, self.op.nodes)
10392
10393   def Exec(self, feedback_fn):
10394     """Compute the list of all the exported system images.
10395
10396     @rtype: dict
10397     @return: a dictionary with the structure node->(export-list)
10398         where export-list is a list of the instances exported on
10399         that node.
10400
10401     """
10402     self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10403     rpcresult = self.rpc.call_export_list(self.nodes)
10404     result = {}
10405     for node in rpcresult:
10406       if rpcresult[node].fail_msg:
10407         result[node] = False
10408       else:
10409         result[node] = rpcresult[node].payload
10410
10411     return result
10412
10413
10414 class LUBackupPrepare(NoHooksLU):
10415   """Prepares an instance for an export and returns useful information.
10416
10417   """
10418   REQ_BGL = False
10419
10420   def ExpandNames(self):
10421     self._ExpandAndLockInstance()
10422
10423   def CheckPrereq(self):
10424     """Check prerequisites.
10425
10426     """
10427     instance_name = self.op.instance_name
10428
10429     self.instance = self.cfg.GetInstanceInfo(instance_name)
10430     assert self.instance is not None, \
10431           "Cannot retrieve locked instance %s" % self.op.instance_name
10432     _CheckNodeOnline(self, self.instance.primary_node)
10433
10434     self._cds = _GetClusterDomainSecret()
10435
10436   def Exec(self, feedback_fn):
10437     """Prepares an instance for an export.
10438
10439     """
10440     instance = self.instance
10441
10442     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10443       salt = utils.GenerateSecret(8)
10444
10445       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10446       result = self.rpc.call_x509_cert_create(instance.primary_node,
10447                                               constants.RIE_CERT_VALIDITY)
10448       result.Raise("Can't create X509 key and certificate on %s" % result.node)
10449
10450       (name, cert_pem) = result.payload
10451
10452       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10453                                              cert_pem)
10454
10455       return {
10456         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10457         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10458                           salt),
10459         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10460         }
10461
10462     return None
10463
10464
10465 class LUBackupExport(LogicalUnit):
10466   """Export an instance to an image in the cluster.
10467
10468   """
10469   HPATH = "instance-export"
10470   HTYPE = constants.HTYPE_INSTANCE
10471   REQ_BGL = False
10472
10473   def CheckArguments(self):
10474     """Check the arguments.
10475
10476     """
10477     self.x509_key_name = self.op.x509_key_name
10478     self.dest_x509_ca_pem = self.op.destination_x509_ca
10479
10480     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10481       if not self.x509_key_name:
10482         raise errors.OpPrereqError("Missing X509 key name for encryption",
10483                                    errors.ECODE_INVAL)
10484
10485       if not self.dest_x509_ca_pem:
10486         raise errors.OpPrereqError("Missing destination X509 CA",
10487                                    errors.ECODE_INVAL)
10488
10489   def ExpandNames(self):
10490     self._ExpandAndLockInstance()
10491
10492     # Lock all nodes for local exports
10493     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10494       # FIXME: lock only instance primary and destination node
10495       #
10496       # Sad but true, for now we have do lock all nodes, as we don't know where
10497       # the previous export might be, and in this LU we search for it and
10498       # remove it from its current node. In the future we could fix this by:
10499       #  - making a tasklet to search (share-lock all), then create the
10500       #    new one, then one to remove, after
10501       #  - removing the removal operation altogether
10502       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10503
10504   def DeclareLocks(self, level):
10505     """Last minute lock declaration."""
10506     # All nodes are locked anyway, so nothing to do here.
10507
10508   def BuildHooksEnv(self):
10509     """Build hooks env.
10510
10511     This will run on the master, primary node and target node.
10512
10513     """
10514     env = {
10515       "EXPORT_MODE": self.op.mode,
10516       "EXPORT_NODE": self.op.target_node,
10517       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10518       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10519       # TODO: Generic function for boolean env variables
10520       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10521       }
10522
10523     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10524
10525     return env
10526
10527   def BuildHooksNodes(self):
10528     """Build hooks nodes.
10529
10530     """
10531     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10532
10533     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10534       nl.append(self.op.target_node)
10535
10536     return (nl, nl)
10537
10538   def CheckPrereq(self):
10539     """Check prerequisites.
10540
10541     This checks that the instance and node names are valid.
10542
10543     """
10544     instance_name = self.op.instance_name
10545
10546     self.instance = self.cfg.GetInstanceInfo(instance_name)
10547     assert self.instance is not None, \
10548           "Cannot retrieve locked instance %s" % self.op.instance_name
10549     _CheckNodeOnline(self, self.instance.primary_node)
10550
10551     if (self.op.remove_instance and self.instance.admin_up and
10552         not self.op.shutdown):
10553       raise errors.OpPrereqError("Can not remove instance without shutting it"
10554                                  " down before")
10555
10556     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10557       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10558       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10559       assert self.dst_node is not None
10560
10561       _CheckNodeOnline(self, self.dst_node.name)
10562       _CheckNodeNotDrained(self, self.dst_node.name)
10563
10564       self._cds = None
10565       self.dest_disk_info = None
10566       self.dest_x509_ca = None
10567
10568     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10569       self.dst_node = None
10570
10571       if len(self.op.target_node) != len(self.instance.disks):
10572         raise errors.OpPrereqError(("Received destination information for %s"
10573                                     " disks, but instance %s has %s disks") %
10574                                    (len(self.op.target_node), instance_name,
10575                                     len(self.instance.disks)),
10576                                    errors.ECODE_INVAL)
10577
10578       cds = _GetClusterDomainSecret()
10579
10580       # Check X509 key name
10581       try:
10582         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10583       except (TypeError, ValueError), err:
10584         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10585
10586       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10587         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10588                                    errors.ECODE_INVAL)
10589
10590       # Load and verify CA
10591       try:
10592         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10593       except OpenSSL.crypto.Error, err:
10594         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10595                                    (err, ), errors.ECODE_INVAL)
10596
10597       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10598       if errcode is not None:
10599         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10600                                    (msg, ), errors.ECODE_INVAL)
10601
10602       self.dest_x509_ca = cert
10603
10604       # Verify target information
10605       disk_info = []
10606       for idx, disk_data in enumerate(self.op.target_node):
10607         try:
10608           (host, port, magic) = \
10609             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10610         except errors.GenericError, err:
10611           raise errors.OpPrereqError("Target info for disk %s: %s" %
10612                                      (idx, err), errors.ECODE_INVAL)
10613
10614         disk_info.append((host, port, magic))
10615
10616       assert len(disk_info) == len(self.op.target_node)
10617       self.dest_disk_info = disk_info
10618
10619     else:
10620       raise errors.ProgrammerError("Unhandled export mode %r" %
10621                                    self.op.mode)
10622
10623     # instance disk type verification
10624     # TODO: Implement export support for file-based disks
10625     for disk in self.instance.disks:
10626       if disk.dev_type == constants.LD_FILE:
10627         raise errors.OpPrereqError("Export not supported for instances with"
10628                                    " file-based disks", errors.ECODE_INVAL)
10629
10630   def _CleanupExports(self, feedback_fn):
10631     """Removes exports of current instance from all other nodes.
10632
10633     If an instance in a cluster with nodes A..D was exported to node C, its
10634     exports will be removed from the nodes A, B and D.
10635
10636     """
10637     assert self.op.mode != constants.EXPORT_MODE_REMOTE
10638
10639     nodelist = self.cfg.GetNodeList()
10640     nodelist.remove(self.dst_node.name)
10641
10642     # on one-node clusters nodelist will be empty after the removal
10643     # if we proceed the backup would be removed because OpBackupQuery
10644     # substitutes an empty list with the full cluster node list.
10645     iname = self.instance.name
10646     if nodelist:
10647       feedback_fn("Removing old exports for instance %s" % iname)
10648       exportlist = self.rpc.call_export_list(nodelist)
10649       for node in exportlist:
10650         if exportlist[node].fail_msg:
10651           continue
10652         if iname in exportlist[node].payload:
10653           msg = self.rpc.call_export_remove(node, iname).fail_msg
10654           if msg:
10655             self.LogWarning("Could not remove older export for instance %s"
10656                             " on node %s: %s", iname, node, msg)
10657
10658   def Exec(self, feedback_fn):
10659     """Export an instance to an image in the cluster.
10660
10661     """
10662     assert self.op.mode in constants.EXPORT_MODES
10663
10664     instance = self.instance
10665     src_node = instance.primary_node
10666
10667     if self.op.shutdown:
10668       # shutdown the instance, but not the disks
10669       feedback_fn("Shutting down instance %s" % instance.name)
10670       result = self.rpc.call_instance_shutdown(src_node, instance,
10671                                                self.op.shutdown_timeout)
10672       # TODO: Maybe ignore failures if ignore_remove_failures is set
10673       result.Raise("Could not shutdown instance %s on"
10674                    " node %s" % (instance.name, src_node))
10675
10676     # set the disks ID correctly since call_instance_start needs the
10677     # correct drbd minor to create the symlinks
10678     for disk in instance.disks:
10679       self.cfg.SetDiskID(disk, src_node)
10680
10681     activate_disks = (not instance.admin_up)
10682
10683     if activate_disks:
10684       # Activate the instance disks if we'exporting a stopped instance
10685       feedback_fn("Activating disks for %s" % instance.name)
10686       _StartInstanceDisks(self, instance, None)
10687
10688     try:
10689       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10690                                                      instance)
10691
10692       helper.CreateSnapshots()
10693       try:
10694         if (self.op.shutdown and instance.admin_up and
10695             not self.op.remove_instance):
10696           assert not activate_disks
10697           feedback_fn("Starting instance %s" % instance.name)
10698           result = self.rpc.call_instance_start(src_node, instance, None, None)
10699           msg = result.fail_msg
10700           if msg:
10701             feedback_fn("Failed to start instance: %s" % msg)
10702             _ShutdownInstanceDisks(self, instance)
10703             raise errors.OpExecError("Could not start instance: %s" % msg)
10704
10705         if self.op.mode == constants.EXPORT_MODE_LOCAL:
10706           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10707         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10708           connect_timeout = constants.RIE_CONNECT_TIMEOUT
10709           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10710
10711           (key_name, _, _) = self.x509_key_name
10712
10713           dest_ca_pem = \
10714             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10715                                             self.dest_x509_ca)
10716
10717           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10718                                                      key_name, dest_ca_pem,
10719                                                      timeouts)
10720       finally:
10721         helper.Cleanup()
10722
10723       # Check for backwards compatibility
10724       assert len(dresults) == len(instance.disks)
10725       assert compat.all(isinstance(i, bool) for i in dresults), \
10726              "Not all results are boolean: %r" % dresults
10727
10728     finally:
10729       if activate_disks:
10730         feedback_fn("Deactivating disks for %s" % instance.name)
10731         _ShutdownInstanceDisks(self, instance)
10732
10733     if not (compat.all(dresults) and fin_resu):
10734       failures = []
10735       if not fin_resu:
10736         failures.append("export finalization")
10737       if not compat.all(dresults):
10738         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10739                                if not dsk)
10740         failures.append("disk export: disk(s) %s" % fdsk)
10741
10742       raise errors.OpExecError("Export failed, errors in %s" %
10743                                utils.CommaJoin(failures))
10744
10745     # At this point, the export was successful, we can cleanup/finish
10746
10747     # Remove instance if requested
10748     if self.op.remove_instance:
10749       feedback_fn("Removing instance %s" % instance.name)
10750       _RemoveInstance(self, feedback_fn, instance,
10751                       self.op.ignore_remove_failures)
10752
10753     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10754       self._CleanupExports(feedback_fn)
10755
10756     return fin_resu, dresults
10757
10758
10759 class LUBackupRemove(NoHooksLU):
10760   """Remove exports related to the named instance.
10761
10762   """
10763   REQ_BGL = False
10764
10765   def ExpandNames(self):
10766     self.needed_locks = {}
10767     # We need all nodes to be locked in order for RemoveExport to work, but we
10768     # don't need to lock the instance itself, as nothing will happen to it (and
10769     # we can remove exports also for a removed instance)
10770     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10771
10772   def Exec(self, feedback_fn):
10773     """Remove any export.
10774
10775     """
10776     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10777     # If the instance was not found we'll try with the name that was passed in.
10778     # This will only work if it was an FQDN, though.
10779     fqdn_warn = False
10780     if not instance_name:
10781       fqdn_warn = True
10782       instance_name = self.op.instance_name
10783
10784     locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
10785     exportlist = self.rpc.call_export_list(locked_nodes)
10786     found = False
10787     for node in exportlist:
10788       msg = exportlist[node].fail_msg
10789       if msg:
10790         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10791         continue
10792       if instance_name in exportlist[node].payload:
10793         found = True
10794         result = self.rpc.call_export_remove(node, instance_name)
10795         msg = result.fail_msg
10796         if msg:
10797           logging.error("Could not remove export for instance %s"
10798                         " on node %s: %s", instance_name, node, msg)
10799
10800     if fqdn_warn and not found:
10801       feedback_fn("Export not found. If trying to remove an export belonging"
10802                   " to a deleted instance please use its Fully Qualified"
10803                   " Domain Name.")
10804
10805
10806 class LUGroupAdd(LogicalUnit):
10807   """Logical unit for creating node groups.
10808
10809   """
10810   HPATH = "group-add"
10811   HTYPE = constants.HTYPE_GROUP
10812   REQ_BGL = False
10813
10814   def ExpandNames(self):
10815     # We need the new group's UUID here so that we can create and acquire the
10816     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10817     # that it should not check whether the UUID exists in the configuration.
10818     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10819     self.needed_locks = {}
10820     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10821
10822   def CheckPrereq(self):
10823     """Check prerequisites.
10824
10825     This checks that the given group name is not an existing node group
10826     already.
10827
10828     """
10829     try:
10830       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10831     except errors.OpPrereqError:
10832       pass
10833     else:
10834       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10835                                  " node group (UUID: %s)" %
10836                                  (self.op.group_name, existing_uuid),
10837                                  errors.ECODE_EXISTS)
10838
10839     if self.op.ndparams:
10840       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10841
10842   def BuildHooksEnv(self):
10843     """Build hooks env.
10844
10845     """
10846     return {
10847       "GROUP_NAME": self.op.group_name,
10848       }
10849
10850   def BuildHooksNodes(self):
10851     """Build hooks nodes.
10852
10853     """
10854     mn = self.cfg.GetMasterNode()
10855     return ([mn], [mn])
10856
10857   def Exec(self, feedback_fn):
10858     """Add the node group to the cluster.
10859
10860     """
10861     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10862                                   uuid=self.group_uuid,
10863                                   alloc_policy=self.op.alloc_policy,
10864                                   ndparams=self.op.ndparams)
10865
10866     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10867     del self.remove_locks[locking.LEVEL_NODEGROUP]
10868
10869
10870 class LUGroupAssignNodes(NoHooksLU):
10871   """Logical unit for assigning nodes to groups.
10872
10873   """
10874   REQ_BGL = False
10875
10876   def ExpandNames(self):
10877     # These raise errors.OpPrereqError on their own:
10878     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10879     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10880
10881     # We want to lock all the affected nodes and groups. We have readily
10882     # available the list of nodes, and the *destination* group. To gather the
10883     # list of "source" groups, we need to fetch node information.
10884     self.node_data = self.cfg.GetAllNodesInfo()
10885     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10886     affected_groups.add(self.group_uuid)
10887
10888     self.needed_locks = {
10889       locking.LEVEL_NODEGROUP: list(affected_groups),
10890       locking.LEVEL_NODE: self.op.nodes,
10891       }
10892
10893   def CheckPrereq(self):
10894     """Check prerequisites.
10895
10896     """
10897     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10898     instance_data = self.cfg.GetAllInstancesInfo()
10899
10900     if self.group is None:
10901       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10902                                (self.op.group_name, self.group_uuid))
10903
10904     (new_splits, previous_splits) = \
10905       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10906                                              for node in self.op.nodes],
10907                                             self.node_data, instance_data)
10908
10909     if new_splits:
10910       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10911
10912       if not self.op.force:
10913         raise errors.OpExecError("The following instances get split by this"
10914                                  " change and --force was not given: %s" %
10915                                  fmt_new_splits)
10916       else:
10917         self.LogWarning("This operation will split the following instances: %s",
10918                         fmt_new_splits)
10919
10920         if previous_splits:
10921           self.LogWarning("In addition, these already-split instances continue"
10922                           " to be split across groups: %s",
10923                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10924
10925   def Exec(self, feedback_fn):
10926     """Assign nodes to a new group.
10927
10928     """
10929     for node in self.op.nodes:
10930       self.node_data[node].group = self.group_uuid
10931
10932     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10933
10934   @staticmethod
10935   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10936     """Check for split instances after a node assignment.
10937
10938     This method considers a series of node assignments as an atomic operation,
10939     and returns information about split instances after applying the set of
10940     changes.
10941
10942     In particular, it returns information about newly split instances, and
10943     instances that were already split, and remain so after the change.
10944
10945     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10946     considered.
10947
10948     @type changes: list of (node_name, new_group_uuid) pairs.
10949     @param changes: list of node assignments to consider.
10950     @param node_data: a dict with data for all nodes
10951     @param instance_data: a dict with all instances to consider
10952     @rtype: a two-tuple
10953     @return: a list of instances that were previously okay and result split as a
10954       consequence of this change, and a list of instances that were previously
10955       split and this change does not fix.
10956
10957     """
10958     changed_nodes = dict((node, group) for node, group in changes
10959                          if node_data[node].group != group)
10960
10961     all_split_instances = set()
10962     previously_split_instances = set()
10963
10964     def InstanceNodes(instance):
10965       return [instance.primary_node] + list(instance.secondary_nodes)
10966
10967     for inst in instance_data.values():
10968       if inst.disk_template not in constants.DTS_INT_MIRROR:
10969         continue
10970
10971       instance_nodes = InstanceNodes(inst)
10972
10973       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10974         previously_split_instances.add(inst.name)
10975
10976       if len(set(changed_nodes.get(node, node_data[node].group)
10977                  for node in instance_nodes)) > 1:
10978         all_split_instances.add(inst.name)
10979
10980     return (list(all_split_instances - previously_split_instances),
10981             list(previously_split_instances & all_split_instances))
10982
10983
10984 class _GroupQuery(_QueryBase):
10985   FIELDS = query.GROUP_FIELDS
10986
10987   def ExpandNames(self, lu):
10988     lu.needed_locks = {}
10989
10990     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10991     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10992
10993     if not self.names:
10994       self.wanted = [name_to_uuid[name]
10995                      for name in utils.NiceSort(name_to_uuid.keys())]
10996     else:
10997       # Accept names to be either names or UUIDs.
10998       missing = []
10999       self.wanted = []
11000       all_uuid = frozenset(self._all_groups.keys())
11001
11002       for name in self.names:
11003         if name in all_uuid:
11004           self.wanted.append(name)
11005         elif name in name_to_uuid:
11006           self.wanted.append(name_to_uuid[name])
11007         else:
11008           missing.append(name)
11009
11010       if missing:
11011         raise errors.OpPrereqError("Some groups do not exist: %s" %
11012                                    utils.CommaJoin(missing),
11013                                    errors.ECODE_NOENT)
11014
11015   def DeclareLocks(self, lu, level):
11016     pass
11017
11018   def _GetQueryData(self, lu):
11019     """Computes the list of node groups and their attributes.
11020
11021     """
11022     do_nodes = query.GQ_NODE in self.requested_data
11023     do_instances = query.GQ_INST in self.requested_data
11024
11025     group_to_nodes = None
11026     group_to_instances = None
11027
11028     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11029     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11030     # latter GetAllInstancesInfo() is not enough, for we have to go through
11031     # instance->node. Hence, we will need to process nodes even if we only need
11032     # instance information.
11033     if do_nodes or do_instances:
11034       all_nodes = lu.cfg.GetAllNodesInfo()
11035       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11036       node_to_group = {}
11037
11038       for node in all_nodes.values():
11039         if node.group in group_to_nodes:
11040           group_to_nodes[node.group].append(node.name)
11041           node_to_group[node.name] = node.group
11042
11043       if do_instances:
11044         all_instances = lu.cfg.GetAllInstancesInfo()
11045         group_to_instances = dict((uuid, []) for uuid in self.wanted)
11046
11047         for instance in all_instances.values():
11048           node = instance.primary_node
11049           if node in node_to_group:
11050             group_to_instances[node_to_group[node]].append(instance.name)
11051
11052         if not do_nodes:
11053           # Do not pass on node information if it was not requested.
11054           group_to_nodes = None
11055
11056     return query.GroupQueryData([self._all_groups[uuid]
11057                                  for uuid in self.wanted],
11058                                 group_to_nodes, group_to_instances)
11059
11060
11061 class LUGroupQuery(NoHooksLU):
11062   """Logical unit for querying node groups.
11063
11064   """
11065   REQ_BGL = False
11066
11067   def CheckArguments(self):
11068     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11069                           self.op.output_fields, False)
11070
11071   def ExpandNames(self):
11072     self.gq.ExpandNames(self)
11073
11074   def Exec(self, feedback_fn):
11075     return self.gq.OldStyleQuery(self)
11076
11077
11078 class LUGroupSetParams(LogicalUnit):
11079   """Modifies the parameters of a node group.
11080
11081   """
11082   HPATH = "group-modify"
11083   HTYPE = constants.HTYPE_GROUP
11084   REQ_BGL = False
11085
11086   def CheckArguments(self):
11087     all_changes = [
11088       self.op.ndparams,
11089       self.op.alloc_policy,
11090       ]
11091
11092     if all_changes.count(None) == len(all_changes):
11093       raise errors.OpPrereqError("Please pass at least one modification",
11094                                  errors.ECODE_INVAL)
11095
11096   def ExpandNames(self):
11097     # This raises errors.OpPrereqError on its own:
11098     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11099
11100     self.needed_locks = {
11101       locking.LEVEL_NODEGROUP: [self.group_uuid],
11102       }
11103
11104   def CheckPrereq(self):
11105     """Check prerequisites.
11106
11107     """
11108     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11109
11110     if self.group is None:
11111       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11112                                (self.op.group_name, self.group_uuid))
11113
11114     if self.op.ndparams:
11115       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11116       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11117       self.new_ndparams = new_ndparams
11118
11119   def BuildHooksEnv(self):
11120     """Build hooks env.
11121
11122     """
11123     return {
11124       "GROUP_NAME": self.op.group_name,
11125       "NEW_ALLOC_POLICY": self.op.alloc_policy,
11126       }
11127
11128   def BuildHooksNodes(self):
11129     """Build hooks nodes.
11130
11131     """
11132     mn = self.cfg.GetMasterNode()
11133     return ([mn], [mn])
11134
11135   def Exec(self, feedback_fn):
11136     """Modifies the node group.
11137
11138     """
11139     result = []
11140
11141     if self.op.ndparams:
11142       self.group.ndparams = self.new_ndparams
11143       result.append(("ndparams", str(self.group.ndparams)))
11144
11145     if self.op.alloc_policy:
11146       self.group.alloc_policy = self.op.alloc_policy
11147
11148     self.cfg.Update(self.group, feedback_fn)
11149     return result
11150
11151
11152
11153 class LUGroupRemove(LogicalUnit):
11154   HPATH = "group-remove"
11155   HTYPE = constants.HTYPE_GROUP
11156   REQ_BGL = False
11157
11158   def ExpandNames(self):
11159     # This will raises errors.OpPrereqError on its own:
11160     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11161     self.needed_locks = {
11162       locking.LEVEL_NODEGROUP: [self.group_uuid],
11163       }
11164
11165   def CheckPrereq(self):
11166     """Check prerequisites.
11167
11168     This checks that the given group name exists as a node group, that is
11169     empty (i.e., contains no nodes), and that is not the last group of the
11170     cluster.
11171
11172     """
11173     # Verify that the group is empty.
11174     group_nodes = [node.name
11175                    for node in self.cfg.GetAllNodesInfo().values()
11176                    if node.group == self.group_uuid]
11177
11178     if group_nodes:
11179       raise errors.OpPrereqError("Group '%s' not empty, has the following"
11180                                  " nodes: %s" %
11181                                  (self.op.group_name,
11182                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
11183                                  errors.ECODE_STATE)
11184
11185     # Verify the cluster would not be left group-less.
11186     if len(self.cfg.GetNodeGroupList()) == 1:
11187       raise errors.OpPrereqError("Group '%s' is the only group,"
11188                                  " cannot be removed" %
11189                                  self.op.group_name,
11190                                  errors.ECODE_STATE)
11191
11192   def BuildHooksEnv(self):
11193     """Build hooks env.
11194
11195     """
11196     return {
11197       "GROUP_NAME": self.op.group_name,
11198       }
11199
11200   def BuildHooksNodes(self):
11201     """Build hooks nodes.
11202
11203     """
11204     mn = self.cfg.GetMasterNode()
11205     return ([mn], [mn])
11206
11207   def Exec(self, feedback_fn):
11208     """Remove the node group.
11209
11210     """
11211     try:
11212       self.cfg.RemoveNodeGroup(self.group_uuid)
11213     except errors.ConfigurationError:
11214       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11215                                (self.op.group_name, self.group_uuid))
11216
11217     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11218
11219
11220 class LUGroupRename(LogicalUnit):
11221   HPATH = "group-rename"
11222   HTYPE = constants.HTYPE_GROUP
11223   REQ_BGL = False
11224
11225   def ExpandNames(self):
11226     # This raises errors.OpPrereqError on its own:
11227     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11228
11229     self.needed_locks = {
11230       locking.LEVEL_NODEGROUP: [self.group_uuid],
11231       }
11232
11233   def CheckPrereq(self):
11234     """Check prerequisites.
11235
11236     Ensures requested new name is not yet used.
11237
11238     """
11239     try:
11240       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11241     except errors.OpPrereqError:
11242       pass
11243     else:
11244       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11245                                  " node group (UUID: %s)" %
11246                                  (self.op.new_name, new_name_uuid),
11247                                  errors.ECODE_EXISTS)
11248
11249   def BuildHooksEnv(self):
11250     """Build hooks env.
11251
11252     """
11253     return {
11254       "OLD_NAME": self.op.group_name,
11255       "NEW_NAME": self.op.new_name,
11256       }
11257
11258   def BuildHooksNodes(self):
11259     """Build hooks nodes.
11260
11261     """
11262     mn = self.cfg.GetMasterNode()
11263
11264     all_nodes = self.cfg.GetAllNodesInfo()
11265     all_nodes.pop(mn, None)
11266
11267     run_nodes = [mn]
11268     run_nodes.extend(node.name for node in all_nodes.values()
11269                      if node.group == self.group_uuid)
11270
11271     return (run_nodes, run_nodes)
11272
11273   def Exec(self, feedback_fn):
11274     """Rename the node group.
11275
11276     """
11277     group = self.cfg.GetNodeGroup(self.group_uuid)
11278
11279     if group is None:
11280       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11281                                (self.op.group_name, self.group_uuid))
11282
11283     group.name = self.op.new_name
11284     self.cfg.Update(group, feedback_fn)
11285
11286     return self.op.new_name
11287
11288
11289 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11290   """Generic tags LU.
11291
11292   This is an abstract class which is the parent of all the other tags LUs.
11293
11294   """
11295   def ExpandNames(self):
11296     self.group_uuid = None
11297     self.needed_locks = {}
11298     if self.op.kind == constants.TAG_NODE:
11299       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11300       self.needed_locks[locking.LEVEL_NODE] = self.op.name
11301     elif self.op.kind == constants.TAG_INSTANCE:
11302       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11303       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11304     elif self.op.kind == constants.TAG_NODEGROUP:
11305       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11306
11307     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11308     # not possible to acquire the BGL based on opcode parameters)
11309
11310   def CheckPrereq(self):
11311     """Check prerequisites.
11312
11313     """
11314     if self.op.kind == constants.TAG_CLUSTER:
11315       self.target = self.cfg.GetClusterInfo()
11316     elif self.op.kind == constants.TAG_NODE:
11317       self.target = self.cfg.GetNodeInfo(self.op.name)
11318     elif self.op.kind == constants.TAG_INSTANCE:
11319       self.target = self.cfg.GetInstanceInfo(self.op.name)
11320     elif self.op.kind == constants.TAG_NODEGROUP:
11321       self.target = self.cfg.GetNodeGroup(self.group_uuid)
11322     else:
11323       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11324                                  str(self.op.kind), errors.ECODE_INVAL)
11325
11326
11327 class LUTagsGet(TagsLU):
11328   """Returns the tags of a given object.
11329
11330   """
11331   REQ_BGL = False
11332
11333   def ExpandNames(self):
11334     TagsLU.ExpandNames(self)
11335
11336     # Share locks as this is only a read operation
11337     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11338
11339   def Exec(self, feedback_fn):
11340     """Returns the tag list.
11341
11342     """
11343     return list(self.target.GetTags())
11344
11345
11346 class LUTagsSearch(NoHooksLU):
11347   """Searches the tags for a given pattern.
11348
11349   """
11350   REQ_BGL = False
11351
11352   def ExpandNames(self):
11353     self.needed_locks = {}
11354
11355   def CheckPrereq(self):
11356     """Check prerequisites.
11357
11358     This checks the pattern passed for validity by compiling it.
11359
11360     """
11361     try:
11362       self.re = re.compile(self.op.pattern)
11363     except re.error, err:
11364       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11365                                  (self.op.pattern, err), errors.ECODE_INVAL)
11366
11367   def Exec(self, feedback_fn):
11368     """Returns the tag list.
11369
11370     """
11371     cfg = self.cfg
11372     tgts = [("/cluster", cfg.GetClusterInfo())]
11373     ilist = cfg.GetAllInstancesInfo().values()
11374     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11375     nlist = cfg.GetAllNodesInfo().values()
11376     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11377     tgts.extend(("/nodegroup/%s" % n.name, n)
11378                 for n in cfg.GetAllNodeGroupsInfo().values())
11379     results = []
11380     for path, target in tgts:
11381       for tag in target.GetTags():
11382         if self.re.search(tag):
11383           results.append((path, tag))
11384     return results
11385
11386
11387 class LUTagsSet(TagsLU):
11388   """Sets a tag on a given object.
11389
11390   """
11391   REQ_BGL = False
11392
11393   def CheckPrereq(self):
11394     """Check prerequisites.
11395
11396     This checks the type and length of the tag name and value.
11397
11398     """
11399     TagsLU.CheckPrereq(self)
11400     for tag in self.op.tags:
11401       objects.TaggableObject.ValidateTag(tag)
11402
11403   def Exec(self, feedback_fn):
11404     """Sets the tag.
11405
11406     """
11407     try:
11408       for tag in self.op.tags:
11409         self.target.AddTag(tag)
11410     except errors.TagError, err:
11411       raise errors.OpExecError("Error while setting tag: %s" % str(err))
11412     self.cfg.Update(self.target, feedback_fn)
11413
11414
11415 class LUTagsDel(TagsLU):
11416   """Delete a list of tags from a given object.
11417
11418   """
11419   REQ_BGL = False
11420
11421   def CheckPrereq(self):
11422     """Check prerequisites.
11423
11424     This checks that we have the given tag.
11425
11426     """
11427     TagsLU.CheckPrereq(self)
11428     for tag in self.op.tags:
11429       objects.TaggableObject.ValidateTag(tag)
11430     del_tags = frozenset(self.op.tags)
11431     cur_tags = self.target.GetTags()
11432
11433     diff_tags = del_tags - cur_tags
11434     if diff_tags:
11435       diff_names = ("'%s'" % i for i in sorted(diff_tags))
11436       raise errors.OpPrereqError("Tag(s) %s not found" %
11437                                  (utils.CommaJoin(diff_names), ),
11438                                  errors.ECODE_NOENT)
11439
11440   def Exec(self, feedback_fn):
11441     """Remove the tag from the object.
11442
11443     """
11444     for tag in self.op.tags:
11445       self.target.RemoveTag(tag)
11446     self.cfg.Update(self.target, feedback_fn)
11447
11448
11449 class LUTestDelay(NoHooksLU):
11450   """Sleep for a specified amount of time.
11451
11452   This LU sleeps on the master and/or nodes for a specified amount of
11453   time.
11454
11455   """
11456   REQ_BGL = False
11457
11458   def ExpandNames(self):
11459     """Expand names and set required locks.
11460
11461     This expands the node list, if any.
11462
11463     """
11464     self.needed_locks = {}
11465     if self.op.on_nodes:
11466       # _GetWantedNodes can be used here, but is not always appropriate to use
11467       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11468       # more information.
11469       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11470       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11471
11472   def _TestDelay(self):
11473     """Do the actual sleep.
11474
11475     """
11476     if self.op.on_master:
11477       if not utils.TestDelay(self.op.duration):
11478         raise errors.OpExecError("Error during master delay test")
11479     if self.op.on_nodes:
11480       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11481       for node, node_result in result.items():
11482         node_result.Raise("Failure during rpc call to node %s" % node)
11483
11484   def Exec(self, feedback_fn):
11485     """Execute the test delay opcode, with the wanted repetitions.
11486
11487     """
11488     if self.op.repeat == 0:
11489       self._TestDelay()
11490     else:
11491       top_value = self.op.repeat - 1
11492       for i in range(self.op.repeat):
11493         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11494         self._TestDelay()
11495
11496
11497 class LUTestJqueue(NoHooksLU):
11498   """Utility LU to test some aspects of the job queue.
11499
11500   """
11501   REQ_BGL = False
11502
11503   # Must be lower than default timeout for WaitForJobChange to see whether it
11504   # notices changed jobs
11505   _CLIENT_CONNECT_TIMEOUT = 20.0
11506   _CLIENT_CONFIRM_TIMEOUT = 60.0
11507
11508   @classmethod
11509   def _NotifyUsingSocket(cls, cb, errcls):
11510     """Opens a Unix socket and waits for another program to connect.
11511
11512     @type cb: callable
11513     @param cb: Callback to send socket name to client
11514     @type errcls: class
11515     @param errcls: Exception class to use for errors
11516
11517     """
11518     # Using a temporary directory as there's no easy way to create temporary
11519     # sockets without writing a custom loop around tempfile.mktemp and
11520     # socket.bind
11521     tmpdir = tempfile.mkdtemp()
11522     try:
11523       tmpsock = utils.PathJoin(tmpdir, "sock")
11524
11525       logging.debug("Creating temporary socket at %s", tmpsock)
11526       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11527       try:
11528         sock.bind(tmpsock)
11529         sock.listen(1)
11530
11531         # Send details to client
11532         cb(tmpsock)
11533
11534         # Wait for client to connect before continuing
11535         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11536         try:
11537           (conn, _) = sock.accept()
11538         except socket.error, err:
11539           raise errcls("Client didn't connect in time (%s)" % err)
11540       finally:
11541         sock.close()
11542     finally:
11543       # Remove as soon as client is connected
11544       shutil.rmtree(tmpdir)
11545
11546     # Wait for client to close
11547     try:
11548       try:
11549         # pylint: disable-msg=E1101
11550         # Instance of '_socketobject' has no ... member
11551         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11552         conn.recv(1)
11553       except socket.error, err:
11554         raise errcls("Client failed to confirm notification (%s)" % err)
11555     finally:
11556       conn.close()
11557
11558   def _SendNotification(self, test, arg, sockname):
11559     """Sends a notification to the client.
11560
11561     @type test: string
11562     @param test: Test name
11563     @param arg: Test argument (depends on test)
11564     @type sockname: string
11565     @param sockname: Socket path
11566
11567     """
11568     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11569
11570   def _Notify(self, prereq, test, arg):
11571     """Notifies the client of a test.
11572
11573     @type prereq: bool
11574     @param prereq: Whether this is a prereq-phase test
11575     @type test: string
11576     @param test: Test name
11577     @param arg: Test argument (depends on test)
11578
11579     """
11580     if prereq:
11581       errcls = errors.OpPrereqError
11582     else:
11583       errcls = errors.OpExecError
11584
11585     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11586                                                   test, arg),
11587                                    errcls)
11588
11589   def CheckArguments(self):
11590     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11591     self.expandnames_calls = 0
11592
11593   def ExpandNames(self):
11594     checkargs_calls = getattr(self, "checkargs_calls", 0)
11595     if checkargs_calls < 1:
11596       raise errors.ProgrammerError("CheckArguments was not called")
11597
11598     self.expandnames_calls += 1
11599
11600     if self.op.notify_waitlock:
11601       self._Notify(True, constants.JQT_EXPANDNAMES, None)
11602
11603     self.LogInfo("Expanding names")
11604
11605     # Get lock on master node (just to get a lock, not for a particular reason)
11606     self.needed_locks = {
11607       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11608       }
11609
11610   def Exec(self, feedback_fn):
11611     if self.expandnames_calls < 1:
11612       raise errors.ProgrammerError("ExpandNames was not called")
11613
11614     if self.op.notify_exec:
11615       self._Notify(False, constants.JQT_EXEC, None)
11616
11617     self.LogInfo("Executing")
11618
11619     if self.op.log_messages:
11620       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11621       for idx, msg in enumerate(self.op.log_messages):
11622         self.LogInfo("Sending log message %s", idx + 1)
11623         feedback_fn(constants.JQT_MSGPREFIX + msg)
11624         # Report how many test messages have been sent
11625         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11626
11627     if self.op.fail:
11628       raise errors.OpExecError("Opcode failure was requested")
11629
11630     return True
11631
11632
11633 class IAllocator(object):
11634   """IAllocator framework.
11635
11636   An IAllocator instance has three sets of attributes:
11637     - cfg that is needed to query the cluster
11638     - input data (all members of the _KEYS class attribute are required)
11639     - four buffer attributes (in|out_data|text), that represent the
11640       input (to the external script) in text and data structure format,
11641       and the output from it, again in two formats
11642     - the result variables from the script (success, info, nodes) for
11643       easy usage
11644
11645   """
11646   # pylint: disable-msg=R0902
11647   # lots of instance attributes
11648   _ALLO_KEYS = [
11649     "name", "mem_size", "disks", "disk_template",
11650     "os", "tags", "nics", "vcpus", "hypervisor",
11651     ]
11652   _RELO_KEYS = [
11653     "name", "relocate_from",
11654     ]
11655   _EVAC_KEYS = [
11656     "evac_nodes",
11657     ]
11658
11659   def __init__(self, cfg, rpc, mode, **kwargs):
11660     self.cfg = cfg
11661     self.rpc = rpc
11662     # init buffer variables
11663     self.in_text = self.out_text = self.in_data = self.out_data = None
11664     # init all input fields so that pylint is happy
11665     self.mode = mode
11666     self.mem_size = self.disks = self.disk_template = None
11667     self.os = self.tags = self.nics = self.vcpus = None
11668     self.hypervisor = None
11669     self.relocate_from = None
11670     self.name = None
11671     self.evac_nodes = None
11672     # computed fields
11673     self.required_nodes = None
11674     # init result fields
11675     self.success = self.info = self.result = None
11676     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11677       keyset = self._ALLO_KEYS
11678       fn = self._AddNewInstance
11679     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11680       keyset = self._RELO_KEYS
11681       fn = self._AddRelocateInstance
11682     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11683       keyset = self._EVAC_KEYS
11684       fn = self._AddEvacuateNodes
11685     else:
11686       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11687                                    " IAllocator" % self.mode)
11688     for key in kwargs:
11689       if key not in keyset:
11690         raise errors.ProgrammerError("Invalid input parameter '%s' to"
11691                                      " IAllocator" % key)
11692       setattr(self, key, kwargs[key])
11693
11694     for key in keyset:
11695       if key not in kwargs:
11696         raise errors.ProgrammerError("Missing input parameter '%s' to"
11697                                      " IAllocator" % key)
11698     self._BuildInputData(fn)
11699
11700   def _ComputeClusterData(self):
11701     """Compute the generic allocator input data.
11702
11703     This is the data that is independent of the actual operation.
11704
11705     """
11706     cfg = self.cfg
11707     cluster_info = cfg.GetClusterInfo()
11708     # cluster data
11709     data = {
11710       "version": constants.IALLOCATOR_VERSION,
11711       "cluster_name": cfg.GetClusterName(),
11712       "cluster_tags": list(cluster_info.GetTags()),
11713       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11714       # we don't have job IDs
11715       }
11716     ninfo = cfg.GetAllNodesInfo()
11717     iinfo = cfg.GetAllInstancesInfo().values()
11718     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11719
11720     # node data
11721     node_list = [n.name for n in ninfo.values() if n.vm_capable]
11722
11723     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11724       hypervisor_name = self.hypervisor
11725     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11726       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11727     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11728       hypervisor_name = cluster_info.enabled_hypervisors[0]
11729
11730     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11731                                         hypervisor_name)
11732     node_iinfo = \
11733       self.rpc.call_all_instances_info(node_list,
11734                                        cluster_info.enabled_hypervisors)
11735
11736     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11737
11738     config_ndata = self._ComputeBasicNodeData(ninfo)
11739     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11740                                                  i_list, config_ndata)
11741     assert len(data["nodes"]) == len(ninfo), \
11742         "Incomplete node data computed"
11743
11744     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11745
11746     self.in_data = data
11747
11748   @staticmethod
11749   def _ComputeNodeGroupData(cfg):
11750     """Compute node groups data.
11751
11752     """
11753     ng = {}
11754     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11755       ng[guuid] = {
11756         "name": gdata.name,
11757         "alloc_policy": gdata.alloc_policy,
11758         }
11759     return ng
11760
11761   @staticmethod
11762   def _ComputeBasicNodeData(node_cfg):
11763     """Compute global node data.
11764
11765     @rtype: dict
11766     @returns: a dict of name: (node dict, node config)
11767
11768     """
11769     node_results = {}
11770     for ninfo in node_cfg.values():
11771       # fill in static (config-based) values
11772       pnr = {
11773         "tags": list(ninfo.GetTags()),
11774         "primary_ip": ninfo.primary_ip,
11775         "secondary_ip": ninfo.secondary_ip,
11776         "offline": ninfo.offline,
11777         "drained": ninfo.drained,
11778         "master_candidate": ninfo.master_candidate,
11779         "group": ninfo.group,
11780         "master_capable": ninfo.master_capable,
11781         "vm_capable": ninfo.vm_capable,
11782         }
11783
11784       node_results[ninfo.name] = pnr
11785
11786     return node_results
11787
11788   @staticmethod
11789   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11790                               node_results):
11791     """Compute global node data.
11792
11793     @param node_results: the basic node structures as filled from the config
11794
11795     """
11796     # make a copy of the current dict
11797     node_results = dict(node_results)
11798     for nname, nresult in node_data.items():
11799       assert nname in node_results, "Missing basic data for node %s" % nname
11800       ninfo = node_cfg[nname]
11801
11802       if not (ninfo.offline or ninfo.drained):
11803         nresult.Raise("Can't get data for node %s" % nname)
11804         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11805                                 nname)
11806         remote_info = nresult.payload
11807
11808         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11809                      'vg_size', 'vg_free', 'cpu_total']:
11810           if attr not in remote_info:
11811             raise errors.OpExecError("Node '%s' didn't return attribute"
11812                                      " '%s'" % (nname, attr))
11813           if not isinstance(remote_info[attr], int):
11814             raise errors.OpExecError("Node '%s' returned invalid value"
11815                                      " for '%s': %s" %
11816                                      (nname, attr, remote_info[attr]))
11817         # compute memory used by primary instances
11818         i_p_mem = i_p_up_mem = 0
11819         for iinfo, beinfo in i_list:
11820           if iinfo.primary_node == nname:
11821             i_p_mem += beinfo[constants.BE_MEMORY]
11822             if iinfo.name not in node_iinfo[nname].payload:
11823               i_used_mem = 0
11824             else:
11825               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11826             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11827             remote_info['memory_free'] -= max(0, i_mem_diff)
11828
11829             if iinfo.admin_up:
11830               i_p_up_mem += beinfo[constants.BE_MEMORY]
11831
11832         # compute memory used by instances
11833         pnr_dyn = {
11834           "total_memory": remote_info['memory_total'],
11835           "reserved_memory": remote_info['memory_dom0'],
11836           "free_memory": remote_info['memory_free'],
11837           "total_disk": remote_info['vg_size'],
11838           "free_disk": remote_info['vg_free'],
11839           "total_cpus": remote_info['cpu_total'],
11840           "i_pri_memory": i_p_mem,
11841           "i_pri_up_memory": i_p_up_mem,
11842           }
11843         pnr_dyn.update(node_results[nname])
11844         node_results[nname] = pnr_dyn
11845
11846     return node_results
11847
11848   @staticmethod
11849   def _ComputeInstanceData(cluster_info, i_list):
11850     """Compute global instance data.
11851
11852     """
11853     instance_data = {}
11854     for iinfo, beinfo in i_list:
11855       nic_data = []
11856       for nic in iinfo.nics:
11857         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11858         nic_dict = {"mac": nic.mac,
11859                     "ip": nic.ip,
11860                     "mode": filled_params[constants.NIC_MODE],
11861                     "link": filled_params[constants.NIC_LINK],
11862                    }
11863         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11864           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11865         nic_data.append(nic_dict)
11866       pir = {
11867         "tags": list(iinfo.GetTags()),
11868         "admin_up": iinfo.admin_up,
11869         "vcpus": beinfo[constants.BE_VCPUS],
11870         "memory": beinfo[constants.BE_MEMORY],
11871         "os": iinfo.os,
11872         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11873         "nics": nic_data,
11874         "disks": [{constants.IDISK_SIZE: dsk.size,
11875                    constants.IDISK_MODE: dsk.mode}
11876                   for dsk in iinfo.disks],
11877         "disk_template": iinfo.disk_template,
11878         "hypervisor": iinfo.hypervisor,
11879         }
11880       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11881                                                  pir["disks"])
11882       instance_data[iinfo.name] = pir
11883
11884     return instance_data
11885
11886   def _AddNewInstance(self):
11887     """Add new instance data to allocator structure.
11888
11889     This in combination with _AllocatorGetClusterData will create the
11890     correct structure needed as input for the allocator.
11891
11892     The checks for the completeness of the opcode must have already been
11893     done.
11894
11895     """
11896     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11897
11898     if self.disk_template in constants.DTS_INT_MIRROR:
11899       self.required_nodes = 2
11900     else:
11901       self.required_nodes = 1
11902     request = {
11903       "name": self.name,
11904       "disk_template": self.disk_template,
11905       "tags": self.tags,
11906       "os": self.os,
11907       "vcpus": self.vcpus,
11908       "memory": self.mem_size,
11909       "disks": self.disks,
11910       "disk_space_total": disk_space,
11911       "nics": self.nics,
11912       "required_nodes": self.required_nodes,
11913       }
11914     return request
11915
11916   def _AddRelocateInstance(self):
11917     """Add relocate instance data to allocator structure.
11918
11919     This in combination with _IAllocatorGetClusterData will create the
11920     correct structure needed as input for the allocator.
11921
11922     The checks for the completeness of the opcode must have already been
11923     done.
11924
11925     """
11926     instance = self.cfg.GetInstanceInfo(self.name)
11927     if instance is None:
11928       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11929                                    " IAllocator" % self.name)
11930
11931     if instance.disk_template not in constants.DTS_MIRRORED:
11932       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11933                                  errors.ECODE_INVAL)
11934
11935     if instance.disk_template in constants.DTS_INT_MIRROR and \
11936         len(instance.secondary_nodes) != 1:
11937       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11938                                  errors.ECODE_STATE)
11939
11940     self.required_nodes = 1
11941     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
11942     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11943
11944     request = {
11945       "name": self.name,
11946       "disk_space_total": disk_space,
11947       "required_nodes": self.required_nodes,
11948       "relocate_from": self.relocate_from,
11949       }
11950     return request
11951
11952   def _AddEvacuateNodes(self):
11953     """Add evacuate nodes data to allocator structure.
11954
11955     """
11956     request = {
11957       "evac_nodes": self.evac_nodes
11958       }
11959     return request
11960
11961   def _BuildInputData(self, fn):
11962     """Build input data structures.
11963
11964     """
11965     self._ComputeClusterData()
11966
11967     request = fn()
11968     request["type"] = self.mode
11969     self.in_data["request"] = request
11970
11971     self.in_text = serializer.Dump(self.in_data)
11972
11973   def Run(self, name, validate=True, call_fn=None):
11974     """Run an instance allocator and return the results.
11975
11976     """
11977     if call_fn is None:
11978       call_fn = self.rpc.call_iallocator_runner
11979
11980     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11981     result.Raise("Failure while running the iallocator script")
11982
11983     self.out_text = result.payload
11984     if validate:
11985       self._ValidateResult()
11986
11987   def _ValidateResult(self):
11988     """Process the allocator results.
11989
11990     This will process and if successful save the result in
11991     self.out_data and the other parameters.
11992
11993     """
11994     try:
11995       rdict = serializer.Load(self.out_text)
11996     except Exception, err:
11997       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11998
11999     if not isinstance(rdict, dict):
12000       raise errors.OpExecError("Can't parse iallocator results: not a dict")
12001
12002     # TODO: remove backwards compatiblity in later versions
12003     if "nodes" in rdict and "result" not in rdict:
12004       rdict["result"] = rdict["nodes"]
12005       del rdict["nodes"]
12006
12007     for key in "success", "info", "result":
12008       if key not in rdict:
12009         raise errors.OpExecError("Can't parse iallocator results:"
12010                                  " missing key '%s'" % key)
12011       setattr(self, key, rdict[key])
12012
12013     if not isinstance(rdict["result"], list):
12014       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
12015                                " is not a list")
12016
12017     if self.mode == constants.IALLOCATOR_MODE_RELOC:
12018       assert self.relocate_from is not None
12019       assert self.required_nodes == 1
12020
12021       node2group = dict((name, ndata["group"])
12022                         for (name, ndata) in self.in_data["nodes"].items())
12023
12024       fn = compat.partial(self._NodesToGroups, node2group,
12025                           self.in_data["nodegroups"])
12026
12027       request_groups = fn(self.relocate_from)
12028       result_groups = fn(rdict["result"])
12029
12030       if result_groups != request_groups:
12031         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12032                                  " differ from original groups (%s)" %
12033                                  (utils.CommaJoin(result_groups),
12034                                   utils.CommaJoin(request_groups)))
12035
12036     self.out_data = rdict
12037
12038   @staticmethod
12039   def _NodesToGroups(node2group, groups, nodes):
12040     """Returns a list of unique group names for a list of nodes.
12041
12042     @type node2group: dict
12043     @param node2group: Map from node name to group UUID
12044     @type groups: dict
12045     @param groups: Group information
12046     @type nodes: list
12047     @param nodes: Node names
12048
12049     """
12050     result = set()
12051
12052     for node in nodes:
12053       try:
12054         group_uuid = node2group[node]
12055       except KeyError:
12056         # Ignore unknown node
12057         pass
12058       else:
12059         try:
12060           group = groups[group_uuid]
12061         except KeyError:
12062           # Can't find group, let's use UUID
12063           group_name = group_uuid
12064         else:
12065           group_name = group["name"]
12066
12067         result.add(group_name)
12068
12069     return sorted(result)
12070
12071
12072 class LUTestAllocator(NoHooksLU):
12073   """Run allocator tests.
12074
12075   This LU runs the allocator tests
12076
12077   """
12078   def CheckPrereq(self):
12079     """Check prerequisites.
12080
12081     This checks the opcode parameters depending on the director and mode test.
12082
12083     """
12084     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12085       for attr in ["mem_size", "disks", "disk_template",
12086                    "os", "tags", "nics", "vcpus"]:
12087         if not hasattr(self.op, attr):
12088           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12089                                      attr, errors.ECODE_INVAL)
12090       iname = self.cfg.ExpandInstanceName(self.op.name)
12091       if iname is not None:
12092         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12093                                    iname, errors.ECODE_EXISTS)
12094       if not isinstance(self.op.nics, list):
12095         raise errors.OpPrereqError("Invalid parameter 'nics'",
12096                                    errors.ECODE_INVAL)
12097       if not isinstance(self.op.disks, list):
12098         raise errors.OpPrereqError("Invalid parameter 'disks'",
12099                                    errors.ECODE_INVAL)
12100       for row in self.op.disks:
12101         if (not isinstance(row, dict) or
12102             "size" not in row or
12103             not isinstance(row["size"], int) or
12104             "mode" not in row or
12105             row["mode"] not in ['r', 'w']):
12106           raise errors.OpPrereqError("Invalid contents of the 'disks'"
12107                                      " parameter", errors.ECODE_INVAL)
12108       if self.op.hypervisor is None:
12109         self.op.hypervisor = self.cfg.GetHypervisorType()
12110     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12111       fname = _ExpandInstanceName(self.cfg, self.op.name)
12112       self.op.name = fname
12113       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12114     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12115       if not hasattr(self.op, "evac_nodes"):
12116         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12117                                    " opcode input", errors.ECODE_INVAL)
12118     else:
12119       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12120                                  self.op.mode, errors.ECODE_INVAL)
12121
12122     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12123       if self.op.allocator is None:
12124         raise errors.OpPrereqError("Missing allocator name",
12125                                    errors.ECODE_INVAL)
12126     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12127       raise errors.OpPrereqError("Wrong allocator test '%s'" %
12128                                  self.op.direction, errors.ECODE_INVAL)
12129
12130   def Exec(self, feedback_fn):
12131     """Run the allocator test.
12132
12133     """
12134     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12135       ial = IAllocator(self.cfg, self.rpc,
12136                        mode=self.op.mode,
12137                        name=self.op.name,
12138                        mem_size=self.op.mem_size,
12139                        disks=self.op.disks,
12140                        disk_template=self.op.disk_template,
12141                        os=self.op.os,
12142                        tags=self.op.tags,
12143                        nics=self.op.nics,
12144                        vcpus=self.op.vcpus,
12145                        hypervisor=self.op.hypervisor,
12146                        )
12147     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12148       ial = IAllocator(self.cfg, self.rpc,
12149                        mode=self.op.mode,
12150                        name=self.op.name,
12151                        relocate_from=list(self.relocate_from),
12152                        )
12153     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12154       ial = IAllocator(self.cfg, self.rpc,
12155                        mode=self.op.mode,
12156                        evac_nodes=self.op.evac_nodes)
12157     else:
12158       raise errors.ProgrammerError("Uncatched mode %s in"
12159                                    " LUTestAllocator.Exec", self.op.mode)
12160
12161     if self.op.direction == constants.IALLOCATOR_DIR_IN:
12162       result = ial.in_text
12163     else:
12164       ial.Run(self.op.allocator, validate=False)
12165       result = ial.out_text
12166     return result
12167
12168
12169 #: Query type implementations
12170 _QUERY_IMPL = {
12171   constants.QR_INSTANCE: _InstanceQuery,
12172   constants.QR_NODE: _NodeQuery,
12173   constants.QR_GROUP: _GroupQuery,
12174   constants.QR_OS: _OsQuery,
12175   }
12176
12177 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12178
12179
12180 def _GetQueryImplementation(name):
12181   """Returns the implemtnation for a query type.
12182
12183   @param name: Query type, must be one of L{constants.QR_VIA_OP}
12184
12185   """
12186   try:
12187     return _QUERY_IMPL[name]
12188   except KeyError:
12189     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12190                                errors.ECODE_INVAL)