code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 class ResultWithJobs:
  78   """Data container for LU results with jobs.
  79
  80   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  81   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  82   contained in the C{jobs} attribute and include the job IDs in the opcode
  83   result.
  84
  85   """
  86   def __init__(self, jobs, **kwargs):
  87     """Initializes this class.
  88
  89     Additional return values can be specified as keyword arguments.
  90
  91     @type jobs: list of lists of L{opcode.OpCode}
  92     @param jobs: A list of lists of opcode objects
  93
  94     """
  95     self.jobs = jobs
  96     self.other = kwargs
  97
  98
  99 class LogicalUnit(object):
 100   """Logical Unit base class.
 101
 102   Subclasses must follow these rules:
 103     - implement ExpandNames
 104     - implement CheckPrereq (except when tasklets are used)
 105     - implement Exec (except when tasklets are used)
 106     - implement BuildHooksEnv
 107     - implement BuildHooksNodes
 108     - redefine HPATH and HTYPE
 109     - optionally redefine their run requirements:
 110         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 111
 112   Note that all commands require root permissions.
 113
 114   @ivar dry_run_result: the value (if any) that will be returned to the caller
 115       in dry-run mode (signalled by opcode dry_run parameter)
 116
 117   """
 118   HPATH = None
 119   HTYPE = None
 120   REQ_BGL = True
 121
 122   def __init__(self, processor, op, context, rpc):
 123     """Constructor for LogicalUnit.
 124
 125     This needs to be overridden in derived classes in order to check op
 126     validity.
 127
 128     """
 129     self.proc = processor
 130     self.op = op
 131     self.cfg = context.cfg
 132     self.glm = context.glm
 133     self.context = context
 134     self.rpc = rpc
 135     # Dicts used to declare locking needs to mcpu
 136     self.needed_locks = None
 137     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 138     self.add_locks = {}
 139     self.remove_locks = {}
 140     # Used to force good behavior when calling helper functions
 141     self.recalculate_locks = {}
 142     # logging
 143     self.Log = processor.Log # pylint: disable-msg=C0103
 144     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 145     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 146     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 147     # support for dry-run
 148     self.dry_run_result = None
 149     # support for generic debug attribute
 150     if (not hasattr(self.op, "debug_level") or
 151         not isinstance(self.op.debug_level, int)):
 152       self.op.debug_level = 0
 153
 154     # Tasklets
 155     self.tasklets = None
 156
 157     # Validate opcode parameters and set defaults
 158     self.op.Validate(True)
 159
 160     self.CheckArguments()
 161
 162   def CheckArguments(self):
 163     """Check syntactic validity for the opcode arguments.
 164
 165     This method is for doing a simple syntactic check and ensure
 166     validity of opcode parameters, without any cluster-related
 167     checks. While the same can be accomplished in ExpandNames and/or
 168     CheckPrereq, doing these separate is better because:
 169
 170       - ExpandNames is left as as purely a lock-related function
 171       - CheckPrereq is run after we have acquired locks (and possible
 172         waited for them)
 173
 174     The function is allowed to change the self.op attribute so that
 175     later methods can no longer worry about missing parameters.
 176
 177     """
 178     pass
 179
 180   def ExpandNames(self):
 181     """Expand names for this LU.
 182
 183     This method is called before starting to execute the opcode, and it should
 184     update all the parameters of the opcode to their canonical form (e.g. a
 185     short node name must be fully expanded after this method has successfully
 186     completed). This way locking, hooks, logging, etc. can work correctly.
 187
 188     LUs which implement this method must also populate the self.needed_locks
 189     member, as a dict with lock levels as keys, and a list of needed lock names
 190     as values. Rules:
 191
 192       - use an empty dict if you don't need any lock
 193       - if you don't need any lock at a particular level omit that level
 194       - don't put anything for the BGL level
 195       - if you want all locks at a level use locking.ALL_SET as a value
 196
 197     If you need to share locks (rather than acquire them exclusively) at one
 198     level you can modify self.share_locks, setting a true value (usually 1) for
 199     that level. By default locks are not shared.
 200
 201     This function can also define a list of tasklets, which then will be
 202     executed in order instead of the usual LU-level CheckPrereq and Exec
 203     functions, if those are not defined by the LU.
 204
 205     Examples::
 206
 207       # Acquire all nodes and one instance
 208       self.needed_locks = {
 209         locking.LEVEL_NODE: locking.ALL_SET,
 210         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 211       }
 212       # Acquire just two nodes
 213       self.needed_locks = {
 214         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 215       }
 216       # Acquire no locks
 217       self.needed_locks = {} # No, you can't leave it to the default value None
 218
 219     """
 220     # The implementation of this method is mandatory only if the new LU is
 221     # concurrent, so that old LUs don't need to be changed all at the same
 222     # time.
 223     if self.REQ_BGL:
 224       self.needed_locks = {} # Exclusive LUs don't need locks.
 225     else:
 226       raise NotImplementedError
 227
 228   def DeclareLocks(self, level):
 229     """Declare LU locking needs for a level
 230
 231     While most LUs can just declare their locking needs at ExpandNames time,
 232     sometimes there's the need to calculate some locks after having acquired
 233     the ones before. This function is called just before acquiring locks at a
 234     particular level, but after acquiring the ones at lower levels, and permits
 235     such calculations. It can be used to modify self.needed_locks, and by
 236     default it does nothing.
 237
 238     This function is only called if you have something already set in
 239     self.needed_locks for the level.
 240
 241     @param level: Locking level which is going to be locked
 242     @type level: member of ganeti.locking.LEVELS
 243
 244     """
 245
 246   def CheckPrereq(self):
 247     """Check prerequisites for this LU.
 248
 249     This method should check that the prerequisites for the execution
 250     of this LU are fulfilled. It can do internode communication, but
 251     it should be idempotent - no cluster or system changes are
 252     allowed.
 253
 254     The method should raise errors.OpPrereqError in case something is
 255     not fulfilled. Its return value is ignored.
 256
 257     This method should also update all the parameters of the opcode to
 258     their canonical form if it hasn't been done by ExpandNames before.
 259
 260     """
 261     if self.tasklets is not None:
 262       for (idx, tl) in enumerate(self.tasklets):
 263         logging.debug("Checking prerequisites for tasklet %s/%s",
 264                       idx + 1, len(self.tasklets))
 265         tl.CheckPrereq()
 266     else:
 267       pass
 268
 269   def Exec(self, feedback_fn):
 270     """Execute the LU.
 271
 272     This method should implement the actual work. It should raise
 273     errors.OpExecError for failures that are somewhat dealt with in
 274     code, or expected.
 275
 276     """
 277     if self.tasklets is not None:
 278       for (idx, tl) in enumerate(self.tasklets):
 279         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 280         tl.Exec(feedback_fn)
 281     else:
 282       raise NotImplementedError
 283
 284   def BuildHooksEnv(self):
 285     """Build hooks environment for this LU.
 286
 287     @rtype: dict
 288     @return: Dictionary containing the environment that will be used for
 289       running the hooks for this LU. The keys of the dict must not be prefixed
 290       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 291       will extend the environment with additional variables. If no environment
 292       should be defined, an empty dictionary should be returned (not C{None}).
 293     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 294       will not be called.
 295
 296     """
 297     raise NotImplementedError
 298
 299   def BuildHooksNodes(self):
 300     """Build list of nodes to run LU's hooks.
 301
 302     @rtype: tuple; (list, list)
 303     @return: Tuple containing a list of node names on which the hook
 304       should run before the execution and a list of node names on which the
 305       hook should run after the execution. No nodes should be returned as an
 306       empty list (and not None).
 307     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 308       will not be called.
 309
 310     """
 311     raise NotImplementedError
 312
 313   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 314     """Notify the LU about the results of its hooks.
 315
 316     This method is called every time a hooks phase is executed, and notifies
 317     the Logical Unit about the hooks' result. The LU can then use it to alter
 318     its result based on the hooks.  By default the method does nothing and the
 319     previous result is passed back unchanged but any LU can define it if it
 320     wants to use the local cluster hook-scripts somehow.
 321
 322     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 323         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 324     @param hook_results: the results of the multi-node hooks rpc call
 325     @param feedback_fn: function used send feedback back to the caller
 326     @param lu_result: the previous Exec result this LU had, or None
 327         in the PRE phase
 328     @return: the new Exec result, based on the previous result
 329         and hook results
 330
 331     """
 332     # API must be kept, thus we ignore the unused argument and could
 333     # be a function warnings
 334     # pylint: disable-msg=W0613,R0201
 335     return lu_result
 336
 337   def _ExpandAndLockInstance(self):
 338     """Helper function to expand and lock an instance.
 339
 340     Many LUs that work on an instance take its name in self.op.instance_name
 341     and need to expand it and then declare the expanded name for locking. This
 342     function does it, and then updates self.op.instance_name to the expanded
 343     name. It also initializes needed_locks as a dict, if this hasn't been done
 344     before.
 345
 346     """
 347     if self.needed_locks is None:
 348       self.needed_locks = {}
 349     else:
 350       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 351         "_ExpandAndLockInstance called with instance-level locks set"
 352     self.op.instance_name = _ExpandInstanceName(self.cfg,
 353                                                 self.op.instance_name)
 354     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 355
 356   def _LockInstancesNodes(self, primary_only=False):
 357     """Helper function to declare instances' nodes for locking.
 358
 359     This function should be called after locking one or more instances to lock
 360     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 361     with all primary or secondary nodes for instances already locked and
 362     present in self.needed_locks[locking.LEVEL_INSTANCE].
 363
 364     It should be called from DeclareLocks, and for safety only works if
 365     self.recalculate_locks[locking.LEVEL_NODE] is set.
 366
 367     In the future it may grow parameters to just lock some instance's nodes, or
 368     to just lock primaries or secondary nodes, if needed.
 369
 370     If should be called in DeclareLocks in a way similar to::
 371
 372       if level == locking.LEVEL_NODE:
 373         self._LockInstancesNodes()
 374
 375     @type primary_only: boolean
 376     @param primary_only: only lock primary nodes of locked instances
 377
 378     """
 379     assert locking.LEVEL_NODE in self.recalculate_locks, \
 380       "_LockInstancesNodes helper function called with no nodes to recalculate"
 381
 382     # TODO: check if we're really been called with the instance locks held
 383
 384     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 385     # future we might want to have different behaviors depending on the value
 386     # of self.recalculate_locks[locking.LEVEL_NODE]
 387     wanted_nodes = []
 388     for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
 389       instance = self.context.cfg.GetInstanceInfo(instance_name)
 390       wanted_nodes.append(instance.primary_node)
 391       if not primary_only:
 392         wanted_nodes.extend(instance.secondary_nodes)
 393
 394     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 395       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 396     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 397       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 398
 399     del self.recalculate_locks[locking.LEVEL_NODE]
 400
 401
 402 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 403   """Simple LU which runs no hooks.
 404
 405   This LU is intended as a parent for other LogicalUnits which will
 406   run no hooks, in order to reduce duplicate code.
 407
 408   """
 409   HPATH = None
 410   HTYPE = None
 411
 412   def BuildHooksEnv(self):
 413     """Empty BuildHooksEnv for NoHooksLu.
 414
 415     This just raises an error.
 416
 417     """
 418     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 419
 420   def BuildHooksNodes(self):
 421     """Empty BuildHooksNodes for NoHooksLU.
 422
 423     """
 424     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 425
 426
 427 class Tasklet:
 428   """Tasklet base class.
 429
 430   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 431   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 432   tasklets know nothing about locks.
 433
 434   Subclasses must follow these rules:
 435     - Implement CheckPrereq
 436     - Implement Exec
 437
 438   """
 439   def __init__(self, lu):
 440     self.lu = lu
 441
 442     # Shortcuts
 443     self.cfg = lu.cfg
 444     self.rpc = lu.rpc
 445
 446   def CheckPrereq(self):
 447     """Check prerequisites for this tasklets.
 448
 449     This method should check whether the prerequisites for the execution of
 450     this tasklet are fulfilled. It can do internode communication, but it
 451     should be idempotent - no cluster or system changes are allowed.
 452
 453     The method should raise errors.OpPrereqError in case something is not
 454     fulfilled. Its return value is ignored.
 455
 456     This method should also update all parameters to their canonical form if it
 457     hasn't been done before.
 458
 459     """
 460     pass
 461
 462   def Exec(self, feedback_fn):
 463     """Execute the tasklet.
 464
 465     This method should implement the actual work. It should raise
 466     errors.OpExecError for failures that are somewhat dealt with in code, or
 467     expected.
 468
 469     """
 470     raise NotImplementedError
 471
 472
 473 class _QueryBase:
 474   """Base for query utility classes.
 475
 476   """
 477   #: Attribute holding field definitions
 478   FIELDS = None
 479
 480   def __init__(self, filter_, fields, use_locking):
 481     """Initializes this class.
 482
 483     """
 484     self.use_locking = use_locking
 485
 486     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 487                              namefield="name")
 488     self.requested_data = self.query.RequestedData()
 489     self.names = self.query.RequestedNames()
 490
 491     # Sort only if no names were requested
 492     self.sort_by_name = not self.names
 493
 494     self.do_locking = None
 495     self.wanted = None
 496
 497   def _GetNames(self, lu, all_names, lock_level):
 498     """Helper function to determine names asked for in the query.
 499
 500     """
 501     if self.do_locking:
 502       names = lu.glm.list_owned(lock_level)
 503     else:
 504       names = all_names
 505
 506     if self.wanted == locking.ALL_SET:
 507       assert not self.names
 508       # caller didn't specify names, so ordering is not important
 509       return utils.NiceSort(names)
 510
 511     # caller specified names and we must keep the same order
 512     assert self.names
 513     assert not self.do_locking or lu.glm.is_owned(lock_level)
 514
 515     missing = set(self.wanted).difference(names)
 516     if missing:
 517       raise errors.OpExecError("Some items were removed before retrieving"
 518                                " their data: %s" % missing)
 519
 520     # Return expanded names
 521     return self.wanted
 522
 523   def ExpandNames(self, lu):
 524     """Expand names for this query.
 525
 526     See L{LogicalUnit.ExpandNames}.
 527
 528     """
 529     raise NotImplementedError()
 530
 531   def DeclareLocks(self, lu, level):
 532     """Declare locks for this query.
 533
 534     See L{LogicalUnit.DeclareLocks}.
 535
 536     """
 537     raise NotImplementedError()
 538
 539   def _GetQueryData(self, lu):
 540     """Collects all data for this query.
 541
 542     @return: Query data object
 543
 544     """
 545     raise NotImplementedError()
 546
 547   def NewStyleQuery(self, lu):
 548     """Collect data and execute query.
 549
 550     """
 551     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 552                                   sort_by_name=self.sort_by_name)
 553
 554   def OldStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return self.query.OldStyleQuery(self._GetQueryData(lu),
 559                                     sort_by_name=self.sort_by_name)
 560
 561
 562 def _GetWantedNodes(lu, nodes):
 563   """Returns list of checked and expanded node names.
 564
 565   @type lu: L{LogicalUnit}
 566   @param lu: the logical unit on whose behalf we execute
 567   @type nodes: list
 568   @param nodes: list of node names or None for all nodes
 569   @rtype: list
 570   @return: the list of nodes, sorted
 571   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 572
 573   """
 574   if nodes:
 575     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 576
 577   return utils.NiceSort(lu.cfg.GetNodeList())
 578
 579
 580 def _GetWantedInstances(lu, instances):
 581   """Returns list of checked and expanded instance names.
 582
 583   @type lu: L{LogicalUnit}
 584   @param lu: the logical unit on whose behalf we execute
 585   @type instances: list
 586   @param instances: list of instance names or None for all instances
 587   @rtype: list
 588   @return: the list of instances, sorted
 589   @raise errors.OpPrereqError: if the instances parameter is wrong type
 590   @raise errors.OpPrereqError: if any of the passed instances is not found
 591
 592   """
 593   if instances:
 594     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 595   else:
 596     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 597   return wanted
 598
 599
 600 def _GetUpdatedParams(old_params, update_dict,
 601                       use_default=True, use_none=False):
 602   """Return the new version of a parameter dictionary.
 603
 604   @type old_params: dict
 605   @param old_params: old parameters
 606   @type update_dict: dict
 607   @param update_dict: dict containing new parameter values, or
 608       constants.VALUE_DEFAULT to reset the parameter to its default
 609       value
 610   @param use_default: boolean
 611   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 612       values as 'to be deleted' values
 613   @param use_none: boolean
 614   @type use_none: whether to recognise C{None} values as 'to be
 615       deleted' values
 616   @rtype: dict
 617   @return: the new parameter dictionary
 618
 619   """
 620   params_copy = copy.deepcopy(old_params)
 621   for key, val in update_dict.iteritems():
 622     if ((use_default and val == constants.VALUE_DEFAULT) or
 623         (use_none and val is None)):
 624       try:
 625         del params_copy[key]
 626       except KeyError:
 627         pass
 628     else:
 629       params_copy[key] = val
 630   return params_copy
 631
 632
 633 def _ReleaseLocks(lu, level, names=None, keep=None):
 634   """Releases locks owned by an LU.
 635
 636   @type lu: L{LogicalUnit}
 637   @param level: Lock level
 638   @type names: list or None
 639   @param names: Names of locks to release
 640   @type keep: list or None
 641   @param keep: Names of locks to retain
 642
 643   """
 644   assert not (keep is not None and names is not None), \
 645          "Only one of the 'names' and the 'keep' parameters can be given"
 646
 647   if names is not None:
 648     should_release = names.__contains__
 649   elif keep:
 650     should_release = lambda name: name not in keep
 651   else:
 652     should_release = None
 653
 654   if should_release:
 655     retain = []
 656     release = []
 657
 658     # Determine which locks to release
 659     for name in lu.glm.list_owned(level):
 660       if should_release(name):
 661         release.append(name)
 662       else:
 663         retain.append(name)
 664
 665     assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
 666
 667     # Release just some locks
 668     lu.glm.release(level, names=release)
 669
 670     assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
 671   else:
 672     # Release everything
 673     lu.glm.release(level)
 674
 675     assert not lu.glm.is_owned(level), "No locks should be owned"
 676
 677
 678 def _RunPostHook(lu, node_name):
 679   """Runs the post-hook for an opcode on a single node.
 680
 681   """
 682   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 683   try:
 684     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 685   except:
 686     # pylint: disable-msg=W0702
 687     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 688
 689
 690 def _CheckOutputFields(static, dynamic, selected):
 691   """Checks whether all selected fields are valid.
 692
 693   @type static: L{utils.FieldSet}
 694   @param static: static fields set
 695   @type dynamic: L{utils.FieldSet}
 696   @param dynamic: dynamic fields set
 697
 698   """
 699   f = utils.FieldSet()
 700   f.Extend(static)
 701   f.Extend(dynamic)
 702
 703   delta = f.NonMatching(selected)
 704   if delta:
 705     raise errors.OpPrereqError("Unknown output fields selected: %s"
 706                                % ",".join(delta), errors.ECODE_INVAL)
 707
 708
 709 def _CheckGlobalHvParams(params):
 710   """Validates that given hypervisor params are not global ones.
 711
 712   This will ensure that instances don't get customised versions of
 713   global params.
 714
 715   """
 716   used_globals = constants.HVC_GLOBALS.intersection(params)
 717   if used_globals:
 718     msg = ("The following hypervisor parameters are global and cannot"
 719            " be customized at instance level, please modify them at"
 720            " cluster level: %s" % utils.CommaJoin(used_globals))
 721     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 722
 723
 724 def _CheckNodeOnline(lu, node, msg=None):
 725   """Ensure that a given node is online.
 726
 727   @param lu: the LU on behalf of which we make the check
 728   @param node: the node to check
 729   @param msg: if passed, should be a message to replace the default one
 730   @raise errors.OpPrereqError: if the node is offline
 731
 732   """
 733   if msg is None:
 734     msg = "Can't use offline node"
 735   if lu.cfg.GetNodeInfo(node).offline:
 736     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 737
 738
 739 def _CheckNodeNotDrained(lu, node):
 740   """Ensure that a given node is not drained.
 741
 742   @param lu: the LU on behalf of which we make the check
 743   @param node: the node to check
 744   @raise errors.OpPrereqError: if the node is drained
 745
 746   """
 747   if lu.cfg.GetNodeInfo(node).drained:
 748     raise errors.OpPrereqError("Can't use drained node %s" % node,
 749                                errors.ECODE_STATE)
 750
 751
 752 def _CheckNodeVmCapable(lu, node):
 753   """Ensure that a given node is vm capable.
 754
 755   @param lu: the LU on behalf of which we make the check
 756   @param node: the node to check
 757   @raise errors.OpPrereqError: if the node is not vm capable
 758
 759   """
 760   if not lu.cfg.GetNodeInfo(node).vm_capable:
 761     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 762                                errors.ECODE_STATE)
 763
 764
 765 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 766   """Ensure that a node supports a given OS.
 767
 768   @param lu: the LU on behalf of which we make the check
 769   @param node: the node to check
 770   @param os_name: the OS to query about
 771   @param force_variant: whether to ignore variant errors
 772   @raise errors.OpPrereqError: if the node is not supporting the OS
 773
 774   """
 775   result = lu.rpc.call_os_get(node, os_name)
 776   result.Raise("OS '%s' not in supported OS list for node %s" %
 777                (os_name, node),
 778                prereq=True, ecode=errors.ECODE_INVAL)
 779   if not force_variant:
 780     _CheckOSVariant(result.payload, os_name)
 781
 782
 783 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 784   """Ensure that a node has the given secondary ip.
 785
 786   @type lu: L{LogicalUnit}
 787   @param lu: the LU on behalf of which we make the check
 788   @type node: string
 789   @param node: the node to check
 790   @type secondary_ip: string
 791   @param secondary_ip: the ip to check
 792   @type prereq: boolean
 793   @param prereq: whether to throw a prerequisite or an execute error
 794   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 795   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 796
 797   """
 798   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 799   result.Raise("Failure checking secondary ip on node %s" % node,
 800                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 801   if not result.payload:
 802     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 803            " please fix and re-run this command" % secondary_ip)
 804     if prereq:
 805       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 806     else:
 807       raise errors.OpExecError(msg)
 808
 809
 810 def _GetClusterDomainSecret():
 811   """Reads the cluster domain secret.
 812
 813   """
 814   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 815                                strict=True)
 816
 817
 818 def _CheckInstanceDown(lu, instance, reason):
 819   """Ensure that an instance is not running."""
 820   if instance.admin_up:
 821     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 822                                (instance.name, reason), errors.ECODE_STATE)
 823
 824   pnode = instance.primary_node
 825   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 826   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 827               prereq=True, ecode=errors.ECODE_ENVIRON)
 828
 829   if instance.name in ins_l.payload:
 830     raise errors.OpPrereqError("Instance %s is running, %s" %
 831                                (instance.name, reason), errors.ECODE_STATE)
 832
 833
 834 def _ExpandItemName(fn, name, kind):
 835   """Expand an item name.
 836
 837   @param fn: the function to use for expansion
 838   @param name: requested item name
 839   @param kind: text description ('Node' or 'Instance')
 840   @return: the resolved (full) name
 841   @raise errors.OpPrereqError: if the item is not found
 842
 843   """
 844   full_name = fn(name)
 845   if full_name is None:
 846     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 847                                errors.ECODE_NOENT)
 848   return full_name
 849
 850
 851 def _ExpandNodeName(cfg, name):
 852   """Wrapper over L{_ExpandItemName} for nodes."""
 853   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 854
 855
 856 def _ExpandInstanceName(cfg, name):
 857   """Wrapper over L{_ExpandItemName} for instance."""
 858   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 859
 860
 861 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 862                           memory, vcpus, nics, disk_template, disks,
 863                           bep, hvp, hypervisor_name):
 864   """Builds instance related env variables for hooks
 865
 866   This builds the hook environment from individual variables.
 867
 868   @type name: string
 869   @param name: the name of the instance
 870   @type primary_node: string
 871   @param primary_node: the name of the instance's primary node
 872   @type secondary_nodes: list
 873   @param secondary_nodes: list of secondary nodes as strings
 874   @type os_type: string
 875   @param os_type: the name of the instance's OS
 876   @type status: boolean
 877   @param status: the should_run status of the instance
 878   @type memory: string
 879   @param memory: the memory size of the instance
 880   @type vcpus: string
 881   @param vcpus: the count of VCPUs the instance has
 882   @type nics: list
 883   @param nics: list of tuples (ip, mac, mode, link) representing
 884       the NICs the instance has
 885   @type disk_template: string
 886   @param disk_template: the disk template of the instance
 887   @type disks: list
 888   @param disks: the list of (size, mode) pairs
 889   @type bep: dict
 890   @param bep: the backend parameters for the instance
 891   @type hvp: dict
 892   @param hvp: the hypervisor parameters for the instance
 893   @type hypervisor_name: string
 894   @param hypervisor_name: the hypervisor for the instance
 895   @rtype: dict
 896   @return: the hook environment for this instance
 897
 898   """
 899   if status:
 900     str_status = "up"
 901   else:
 902     str_status = "down"
 903   env = {
 904     "OP_TARGET": name,
 905     "INSTANCE_NAME": name,
 906     "INSTANCE_PRIMARY": primary_node,
 907     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 908     "INSTANCE_OS_TYPE": os_type,
 909     "INSTANCE_STATUS": str_status,
 910     "INSTANCE_MEMORY": memory,
 911     "INSTANCE_VCPUS": vcpus,
 912     "INSTANCE_DISK_TEMPLATE": disk_template,
 913     "INSTANCE_HYPERVISOR": hypervisor_name,
 914   }
 915
 916   if nics:
 917     nic_count = len(nics)
 918     for idx, (ip, mac, mode, link) in enumerate(nics):
 919       if ip is None:
 920         ip = ""
 921       env["INSTANCE_NIC%d_IP" % idx] = ip
 922       env["INSTANCE_NIC%d_MAC" % idx] = mac
 923       env["INSTANCE_NIC%d_MODE" % idx] = mode
 924       env["INSTANCE_NIC%d_LINK" % idx] = link
 925       if mode == constants.NIC_MODE_BRIDGED:
 926         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 927   else:
 928     nic_count = 0
 929
 930   env["INSTANCE_NIC_COUNT"] = nic_count
 931
 932   if disks:
 933     disk_count = len(disks)
 934     for idx, (size, mode) in enumerate(disks):
 935       env["INSTANCE_DISK%d_SIZE" % idx] = size
 936       env["INSTANCE_DISK%d_MODE" % idx] = mode
 937   else:
 938     disk_count = 0
 939
 940   env["INSTANCE_DISK_COUNT"] = disk_count
 941
 942   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 943     for key, value in source.items():
 944       env["INSTANCE_%s_%s" % (kind, key)] = value
 945
 946   return env
 947
 948
 949 def _NICListToTuple(lu, nics):
 950   """Build a list of nic information tuples.
 951
 952   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 953   value in LUInstanceQueryData.
 954
 955   @type lu:  L{LogicalUnit}
 956   @param lu: the logical unit on whose behalf we execute
 957   @type nics: list of L{objects.NIC}
 958   @param nics: list of nics to convert to hooks tuples
 959
 960   """
 961   hooks_nics = []
 962   cluster = lu.cfg.GetClusterInfo()
 963   for nic in nics:
 964     ip = nic.ip
 965     mac = nic.mac
 966     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 967     mode = filled_params[constants.NIC_MODE]
 968     link = filled_params[constants.NIC_LINK]
 969     hooks_nics.append((ip, mac, mode, link))
 970   return hooks_nics
 971
 972
 973 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 974   """Builds instance related env variables for hooks from an object.
 975
 976   @type lu: L{LogicalUnit}
 977   @param lu: the logical unit on whose behalf we execute
 978   @type instance: L{objects.Instance}
 979   @param instance: the instance for which we should build the
 980       environment
 981   @type override: dict
 982   @param override: dictionary with key/values that will override
 983       our values
 984   @rtype: dict
 985   @return: the hook environment dictionary
 986
 987   """
 988   cluster = lu.cfg.GetClusterInfo()
 989   bep = cluster.FillBE(instance)
 990   hvp = cluster.FillHV(instance)
 991   args = {
 992     'name': instance.name,
 993     'primary_node': instance.primary_node,
 994     'secondary_nodes': instance.secondary_nodes,
 995     'os_type': instance.os,
 996     'status': instance.admin_up,
 997     'memory': bep[constants.BE_MEMORY],
 998     'vcpus': bep[constants.BE_VCPUS],
 999     'nics': _NICListToTuple(lu, instance.nics),
1000     'disk_template': instance.disk_template,
1001     'disks': [(disk.size, disk.mode) for disk in instance.disks],
1002     'bep': bep,
1003     'hvp': hvp,
1004     'hypervisor_name': instance.hypervisor,
1005   }
1006   if override:
1007     args.update(override)
1008   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1009
1010
1011 def _AdjustCandidatePool(lu, exceptions):
1012   """Adjust the candidate pool after node operations.
1013
1014   """
1015   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1016   if mod_list:
1017     lu.LogInfo("Promoted nodes to master candidate role: %s",
1018                utils.CommaJoin(node.name for node in mod_list))
1019     for name in mod_list:
1020       lu.context.ReaddNode(name)
1021   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1022   if mc_now > mc_max:
1023     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1024                (mc_now, mc_max))
1025
1026
1027 def _DecideSelfPromotion(lu, exceptions=None):
1028   """Decide whether I should promote myself as a master candidate.
1029
1030   """
1031   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1032   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1033   # the new node will increase mc_max with one, so:
1034   mc_should = min(mc_should + 1, cp_size)
1035   return mc_now < mc_should
1036
1037
1038 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1039   """Check that the brigdes needed by a list of nics exist.
1040
1041   """
1042   cluster = lu.cfg.GetClusterInfo()
1043   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1044   brlist = [params[constants.NIC_LINK] for params in paramslist
1045             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1046   if brlist:
1047     result = lu.rpc.call_bridges_exist(target_node, brlist)
1048     result.Raise("Error checking bridges on destination node '%s'" %
1049                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1050
1051
1052 def _CheckInstanceBridgesExist(lu, instance, node=None):
1053   """Check that the brigdes needed by an instance exist.
1054
1055   """
1056   if node is None:
1057     node = instance.primary_node
1058   _CheckNicsBridgesExist(lu, instance.nics, node)
1059
1060
1061 def _CheckOSVariant(os_obj, name):
1062   """Check whether an OS name conforms to the os variants specification.
1063
1064   @type os_obj: L{objects.OS}
1065   @param os_obj: OS object to check
1066   @type name: string
1067   @param name: OS name passed by the user, to check for validity
1068
1069   """
1070   if not os_obj.supported_variants:
1071     return
1072   variant = objects.OS.GetVariant(name)
1073   if not variant:
1074     raise errors.OpPrereqError("OS name must include a variant",
1075                                errors.ECODE_INVAL)
1076
1077   if variant not in os_obj.supported_variants:
1078     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1079
1080
1081 def _GetNodeInstancesInner(cfg, fn):
1082   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1083
1084
1085 def _GetNodeInstances(cfg, node_name):
1086   """Returns a list of all primary and secondary instances on a node.
1087
1088   """
1089
1090   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1091
1092
1093 def _GetNodePrimaryInstances(cfg, node_name):
1094   """Returns primary instances on a node.
1095
1096   """
1097   return _GetNodeInstancesInner(cfg,
1098                                 lambda inst: node_name == inst.primary_node)
1099
1100
1101 def _GetNodeSecondaryInstances(cfg, node_name):
1102   """Returns secondary instances on a node.
1103
1104   """
1105   return _GetNodeInstancesInner(cfg,
1106                                 lambda inst: node_name in inst.secondary_nodes)
1107
1108
1109 def _GetStorageTypeArgs(cfg, storage_type):
1110   """Returns the arguments for a storage type.
1111
1112   """
1113   # Special case for file storage
1114   if storage_type == constants.ST_FILE:
1115     # storage.FileStorage wants a list of storage directories
1116     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1117
1118   return []
1119
1120
1121 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1122   faulty = []
1123
1124   for dev in instance.disks:
1125     cfg.SetDiskID(dev, node_name)
1126
1127   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1128   result.Raise("Failed to get disk status from node %s" % node_name,
1129                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1130
1131   for idx, bdev_status in enumerate(result.payload):
1132     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1133       faulty.append(idx)
1134
1135   return faulty
1136
1137
1138 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1139   """Check the sanity of iallocator and node arguments and use the
1140   cluster-wide iallocator if appropriate.
1141
1142   Check that at most one of (iallocator, node) is specified. If none is
1143   specified, then the LU's opcode's iallocator slot is filled with the
1144   cluster-wide default iallocator.
1145
1146   @type iallocator_slot: string
1147   @param iallocator_slot: the name of the opcode iallocator slot
1148   @type node_slot: string
1149   @param node_slot: the name of the opcode target node slot
1150
1151   """
1152   node = getattr(lu.op, node_slot, None)
1153   iallocator = getattr(lu.op, iallocator_slot, None)
1154
1155   if node is not None and iallocator is not None:
1156     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1157                                errors.ECODE_INVAL)
1158   elif node is None and iallocator is None:
1159     default_iallocator = lu.cfg.GetDefaultIAllocator()
1160     if default_iallocator:
1161       setattr(lu.op, iallocator_slot, default_iallocator)
1162     else:
1163       raise errors.OpPrereqError("No iallocator or node given and no"
1164                                  " cluster-wide default iallocator found."
1165                                  " Please specify either an iallocator or a"
1166                                  " node, or set a cluster-wide default"
1167                                  " iallocator.")
1168
1169
1170 class LUClusterPostInit(LogicalUnit):
1171   """Logical unit for running hooks after cluster initialization.
1172
1173   """
1174   HPATH = "cluster-init"
1175   HTYPE = constants.HTYPE_CLUSTER
1176
1177   def BuildHooksEnv(self):
1178     """Build hooks env.
1179
1180     """
1181     return {
1182       "OP_TARGET": self.cfg.GetClusterName(),
1183       }
1184
1185   def BuildHooksNodes(self):
1186     """Build hooks nodes.
1187
1188     """
1189     return ([], [self.cfg.GetMasterNode()])
1190
1191   def Exec(self, feedback_fn):
1192     """Nothing to do.
1193
1194     """
1195     return True
1196
1197
1198 class LUClusterDestroy(LogicalUnit):
1199   """Logical unit for destroying the cluster.
1200
1201   """
1202   HPATH = "cluster-destroy"
1203   HTYPE = constants.HTYPE_CLUSTER
1204
1205   def BuildHooksEnv(self):
1206     """Build hooks env.
1207
1208     """
1209     return {
1210       "OP_TARGET": self.cfg.GetClusterName(),
1211       }
1212
1213   def BuildHooksNodes(self):
1214     """Build hooks nodes.
1215
1216     """
1217     return ([], [])
1218
1219   def CheckPrereq(self):
1220     """Check prerequisites.
1221
1222     This checks whether the cluster is empty.
1223
1224     Any errors are signaled by raising errors.OpPrereqError.
1225
1226     """
1227     master = self.cfg.GetMasterNode()
1228
1229     nodelist = self.cfg.GetNodeList()
1230     if len(nodelist) != 1 or nodelist[0] != master:
1231       raise errors.OpPrereqError("There are still %d node(s) in"
1232                                  " this cluster." % (len(nodelist) - 1),
1233                                  errors.ECODE_INVAL)
1234     instancelist = self.cfg.GetInstanceList()
1235     if instancelist:
1236       raise errors.OpPrereqError("There are still %d instance(s) in"
1237                                  " this cluster." % len(instancelist),
1238                                  errors.ECODE_INVAL)
1239
1240   def Exec(self, feedback_fn):
1241     """Destroys the cluster.
1242
1243     """
1244     master = self.cfg.GetMasterNode()
1245
1246     # Run post hooks on master node before it's removed
1247     _RunPostHook(self, master)
1248
1249     result = self.rpc.call_node_stop_master(master, False)
1250     result.Raise("Could not disable the master role")
1251
1252     return master
1253
1254
1255 def _VerifyCertificate(filename):
1256   """Verifies a certificate for LUClusterVerify.
1257
1258   @type filename: string
1259   @param filename: Path to PEM file
1260
1261   """
1262   try:
1263     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1264                                            utils.ReadFile(filename))
1265   except Exception, err: # pylint: disable-msg=W0703
1266     return (LUClusterVerify.ETYPE_ERROR,
1267             "Failed to load X509 certificate %s: %s" % (filename, err))
1268
1269   (errcode, msg) = \
1270     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1271                                 constants.SSL_CERT_EXPIRATION_ERROR)
1272
1273   if msg:
1274     fnamemsg = "While verifying %s: %s" % (filename, msg)
1275   else:
1276     fnamemsg = None
1277
1278   if errcode is None:
1279     return (None, fnamemsg)
1280   elif errcode == utils.CERT_WARNING:
1281     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1282   elif errcode == utils.CERT_ERROR:
1283     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1284
1285   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1286
1287
1288 class LUClusterVerify(LogicalUnit):
1289   """Verifies the cluster status.
1290
1291   """
1292   HPATH = "cluster-verify"
1293   HTYPE = constants.HTYPE_CLUSTER
1294   REQ_BGL = False
1295
1296   TCLUSTER = "cluster"
1297   TNODE = "node"
1298   TINSTANCE = "instance"
1299
1300   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1301   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1302   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1303   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1304   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1305   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1306   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1307   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1308   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1309   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1310   ENODEDRBD = (TNODE, "ENODEDRBD")
1311   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1312   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1313   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1314   ENODEHV = (TNODE, "ENODEHV")
1315   ENODELVM = (TNODE, "ENODELVM")
1316   ENODEN1 = (TNODE, "ENODEN1")
1317   ENODENET = (TNODE, "ENODENET")
1318   ENODEOS = (TNODE, "ENODEOS")
1319   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1320   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1321   ENODERPC = (TNODE, "ENODERPC")
1322   ENODESSH = (TNODE, "ENODESSH")
1323   ENODEVERSION = (TNODE, "ENODEVERSION")
1324   ENODESETUP = (TNODE, "ENODESETUP")
1325   ENODETIME = (TNODE, "ENODETIME")
1326   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1327
1328   ETYPE_FIELD = "code"
1329   ETYPE_ERROR = "ERROR"
1330   ETYPE_WARNING = "WARNING"
1331
1332   _HOOKS_INDENT_RE = re.compile("^", re.M)
1333
1334   class NodeImage(object):
1335     """A class representing the logical and physical status of a node.
1336
1337     @type name: string
1338     @ivar name: the node name to which this object refers
1339     @ivar volumes: a structure as returned from
1340         L{ganeti.backend.GetVolumeList} (runtime)
1341     @ivar instances: a list of running instances (runtime)
1342     @ivar pinst: list of configured primary instances (config)
1343     @ivar sinst: list of configured secondary instances (config)
1344     @ivar sbp: dictionary of {primary-node: list of instances} for all
1345         instances for which this node is secondary (config)
1346     @ivar mfree: free memory, as reported by hypervisor (runtime)
1347     @ivar dfree: free disk, as reported by the node (runtime)
1348     @ivar offline: the offline status (config)
1349     @type rpc_fail: boolean
1350     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1351         not whether the individual keys were correct) (runtime)
1352     @type lvm_fail: boolean
1353     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1354     @type hyp_fail: boolean
1355     @ivar hyp_fail: whether the RPC call didn't return the instance list
1356     @type ghost: boolean
1357     @ivar ghost: whether this is a known node or not (config)
1358     @type os_fail: boolean
1359     @ivar os_fail: whether the RPC call didn't return valid OS data
1360     @type oslist: list
1361     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1362     @type vm_capable: boolean
1363     @ivar vm_capable: whether the node can host instances
1364
1365     """
1366     def __init__(self, offline=False, name=None, vm_capable=True):
1367       self.name = name
1368       self.volumes = {}
1369       self.instances = []
1370       self.pinst = []
1371       self.sinst = []
1372       self.sbp = {}
1373       self.mfree = 0
1374       self.dfree = 0
1375       self.offline = offline
1376       self.vm_capable = vm_capable
1377       self.rpc_fail = False
1378       self.lvm_fail = False
1379       self.hyp_fail = False
1380       self.ghost = False
1381       self.os_fail = False
1382       self.oslist = {}
1383
1384   def ExpandNames(self):
1385     self.needed_locks = {
1386       locking.LEVEL_NODE: locking.ALL_SET,
1387       locking.LEVEL_INSTANCE: locking.ALL_SET,
1388     }
1389     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1390
1391   def _Error(self, ecode, item, msg, *args, **kwargs):
1392     """Format an error message.
1393
1394     Based on the opcode's error_codes parameter, either format a
1395     parseable error code, or a simpler error string.
1396
1397     This must be called only from Exec and functions called from Exec.
1398
1399     """
1400     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1401     itype, etxt = ecode
1402     # first complete the msg
1403     if args:
1404       msg = msg % args
1405     # then format the whole message
1406     if self.op.error_codes:
1407       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1408     else:
1409       if item:
1410         item = " " + item
1411       else:
1412         item = ""
1413       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1414     # and finally report it via the feedback_fn
1415     self._feedback_fn("  - %s" % msg)
1416
1417   def _ErrorIf(self, cond, *args, **kwargs):
1418     """Log an error message if the passed condition is True.
1419
1420     """
1421     cond = bool(cond) or self.op.debug_simulate_errors
1422     if cond:
1423       self._Error(*args, **kwargs)
1424     # do not mark the operation as failed for WARN cases only
1425     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1426       self.bad = self.bad or cond
1427
1428   def _VerifyNode(self, ninfo, nresult):
1429     """Perform some basic validation on data returned from a node.
1430
1431       - check the result data structure is well formed and has all the
1432         mandatory fields
1433       - check ganeti version
1434
1435     @type ninfo: L{objects.Node}
1436     @param ninfo: the node to check
1437     @param nresult: the results from the node
1438     @rtype: boolean
1439     @return: whether overall this call was successful (and we can expect
1440          reasonable values in the respose)
1441
1442     """
1443     node = ninfo.name
1444     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1445
1446     # main result, nresult should be a non-empty dict
1447     test = not nresult or not isinstance(nresult, dict)
1448     _ErrorIf(test, self.ENODERPC, node,
1449                   "unable to verify node: no data returned")
1450     if test:
1451       return False
1452
1453     # compares ganeti version
1454     local_version = constants.PROTOCOL_VERSION
1455     remote_version = nresult.get("version", None)
1456     test = not (remote_version and
1457                 isinstance(remote_version, (list, tuple)) and
1458                 len(remote_version) == 2)
1459     _ErrorIf(test, self.ENODERPC, node,
1460              "connection to node returned invalid data")
1461     if test:
1462       return False
1463
1464     test = local_version != remote_version[0]
1465     _ErrorIf(test, self.ENODEVERSION, node,
1466              "incompatible protocol versions: master %s,"
1467              " node %s", local_version, remote_version[0])
1468     if test:
1469       return False
1470
1471     # node seems compatible, we can actually try to look into its results
1472
1473     # full package version
1474     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1475                   self.ENODEVERSION, node,
1476                   "software version mismatch: master %s, node %s",
1477                   constants.RELEASE_VERSION, remote_version[1],
1478                   code=self.ETYPE_WARNING)
1479
1480     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1481     if ninfo.vm_capable and isinstance(hyp_result, dict):
1482       for hv_name, hv_result in hyp_result.iteritems():
1483         test = hv_result is not None
1484         _ErrorIf(test, self.ENODEHV, node,
1485                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1486
1487     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1488     if ninfo.vm_capable and isinstance(hvp_result, list):
1489       for item, hv_name, hv_result in hvp_result:
1490         _ErrorIf(True, self.ENODEHV, node,
1491                  "hypervisor %s parameter verify failure (source %s): %s",
1492                  hv_name, item, hv_result)
1493
1494     test = nresult.get(constants.NV_NODESETUP,
1495                        ["Missing NODESETUP results"])
1496     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1497              "; ".join(test))
1498
1499     return True
1500
1501   def _VerifyNodeTime(self, ninfo, nresult,
1502                       nvinfo_starttime, nvinfo_endtime):
1503     """Check the node time.
1504
1505     @type ninfo: L{objects.Node}
1506     @param ninfo: the node to check
1507     @param nresult: the remote results for the node
1508     @param nvinfo_starttime: the start time of the RPC call
1509     @param nvinfo_endtime: the end time of the RPC call
1510
1511     """
1512     node = ninfo.name
1513     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1514
1515     ntime = nresult.get(constants.NV_TIME, None)
1516     try:
1517       ntime_merged = utils.MergeTime(ntime)
1518     except (ValueError, TypeError):
1519       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1520       return
1521
1522     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1523       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1524     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1525       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1526     else:
1527       ntime_diff = None
1528
1529     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1530              "Node time diverges by at least %s from master node time",
1531              ntime_diff)
1532
1533   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1534     """Check the node time.
1535
1536     @type ninfo: L{objects.Node}
1537     @param ninfo: the node to check
1538     @param nresult: the remote results for the node
1539     @param vg_name: the configured VG name
1540
1541     """
1542     if vg_name is None:
1543       return
1544
1545     node = ninfo.name
1546     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1547
1548     # checks vg existence and size > 20G
1549     vglist = nresult.get(constants.NV_VGLIST, None)
1550     test = not vglist
1551     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1552     if not test:
1553       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1554                                             constants.MIN_VG_SIZE)
1555       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1556
1557     # check pv names
1558     pvlist = nresult.get(constants.NV_PVLIST, None)
1559     test = pvlist is None
1560     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1561     if not test:
1562       # check that ':' is not present in PV names, since it's a
1563       # special character for lvcreate (denotes the range of PEs to
1564       # use on the PV)
1565       for _, pvname, owner_vg in pvlist:
1566         test = ":" in pvname
1567         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1568                  " '%s' of VG '%s'", pvname, owner_vg)
1569
1570   def _VerifyNodeNetwork(self, ninfo, nresult):
1571     """Check the node time.
1572
1573     @type ninfo: L{objects.Node}
1574     @param ninfo: the node to check
1575     @param nresult: the remote results for the node
1576
1577     """
1578     node = ninfo.name
1579     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1580
1581     test = constants.NV_NODELIST not in nresult
1582     _ErrorIf(test, self.ENODESSH, node,
1583              "node hasn't returned node ssh connectivity data")
1584     if not test:
1585       if nresult[constants.NV_NODELIST]:
1586         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1587           _ErrorIf(True, self.ENODESSH, node,
1588                    "ssh communication with node '%s': %s", a_node, a_msg)
1589
1590     test = constants.NV_NODENETTEST not in nresult
1591     _ErrorIf(test, self.ENODENET, node,
1592              "node hasn't returned node tcp connectivity data")
1593     if not test:
1594       if nresult[constants.NV_NODENETTEST]:
1595         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1596         for anode in nlist:
1597           _ErrorIf(True, self.ENODENET, node,
1598                    "tcp communication with node '%s': %s",
1599                    anode, nresult[constants.NV_NODENETTEST][anode])
1600
1601     test = constants.NV_MASTERIP not in nresult
1602     _ErrorIf(test, self.ENODENET, node,
1603              "node hasn't returned node master IP reachability data")
1604     if not test:
1605       if not nresult[constants.NV_MASTERIP]:
1606         if node == self.master_node:
1607           msg = "the master node cannot reach the master IP (not configured?)"
1608         else:
1609           msg = "cannot reach the master IP"
1610         _ErrorIf(True, self.ENODENET, node, msg)
1611
1612   def _VerifyInstance(self, instance, instanceconfig, node_image,
1613                       diskstatus):
1614     """Verify an instance.
1615
1616     This function checks to see if the required block devices are
1617     available on the instance's node.
1618
1619     """
1620     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1621     node_current = instanceconfig.primary_node
1622
1623     node_vol_should = {}
1624     instanceconfig.MapLVsByNode(node_vol_should)
1625
1626     for node in node_vol_should:
1627       n_img = node_image[node]
1628       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1629         # ignore missing volumes on offline or broken nodes
1630         continue
1631       for volume in node_vol_should[node]:
1632         test = volume not in n_img.volumes
1633         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1634                  "volume %s missing on node %s", volume, node)
1635
1636     if instanceconfig.admin_up:
1637       pri_img = node_image[node_current]
1638       test = instance not in pri_img.instances and not pri_img.offline
1639       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1640                "instance not running on its primary node %s",
1641                node_current)
1642
1643     for node, n_img in node_image.items():
1644       if node != node_current:
1645         test = instance in n_img.instances
1646         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1647                  "instance should not run on node %s", node)
1648
1649     diskdata = [(nname, success, status, idx)
1650                 for (nname, disks) in diskstatus.items()
1651                 for idx, (success, status) in enumerate(disks)]
1652
1653     for nname, success, bdev_status, idx in diskdata:
1654       # the 'ghost node' construction in Exec() ensures that we have a
1655       # node here
1656       snode = node_image[nname]
1657       bad_snode = snode.ghost or snode.offline
1658       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1659                self.EINSTANCEFAULTYDISK, instance,
1660                "couldn't retrieve status for disk/%s on %s: %s",
1661                idx, nname, bdev_status)
1662       _ErrorIf((instanceconfig.admin_up and success and
1663                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1664                self.EINSTANCEFAULTYDISK, instance,
1665                "disk/%s on %s is faulty", idx, nname)
1666
1667   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1668     """Verify if there are any unknown volumes in the cluster.
1669
1670     The .os, .swap and backup volumes are ignored. All other volumes are
1671     reported as unknown.
1672
1673     @type reserved: L{ganeti.utils.FieldSet}
1674     @param reserved: a FieldSet of reserved volume names
1675
1676     """
1677     for node, n_img in node_image.items():
1678       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1679         # skip non-healthy nodes
1680         continue
1681       for volume in n_img.volumes:
1682         test = ((node not in node_vol_should or
1683                 volume not in node_vol_should[node]) and
1684                 not reserved.Matches(volume))
1685         self._ErrorIf(test, self.ENODEORPHANLV, node,
1686                       "volume %s is unknown", volume)
1687
1688   def _VerifyOrphanInstances(self, instancelist, node_image):
1689     """Verify the list of running instances.
1690
1691     This checks what instances are running but unknown to the cluster.
1692
1693     """
1694     for node, n_img in node_image.items():
1695       for o_inst in n_img.instances:
1696         test = o_inst not in instancelist
1697         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1698                       "instance %s on node %s should not exist", o_inst, node)
1699
1700   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1701     """Verify N+1 Memory Resilience.
1702
1703     Check that if one single node dies we can still start all the
1704     instances it was primary for.
1705
1706     """
1707     cluster_info = self.cfg.GetClusterInfo()
1708     for node, n_img in node_image.items():
1709       # This code checks that every node which is now listed as
1710       # secondary has enough memory to host all instances it is
1711       # supposed to should a single other node in the cluster fail.
1712       # FIXME: not ready for failover to an arbitrary node
1713       # FIXME: does not support file-backed instances
1714       # WARNING: we currently take into account down instances as well
1715       # as up ones, considering that even if they're down someone
1716       # might want to start them even in the event of a node failure.
1717       if n_img.offline:
1718         # we're skipping offline nodes from the N+1 warning, since
1719         # most likely we don't have good memory infromation from them;
1720         # we already list instances living on such nodes, and that's
1721         # enough warning
1722         continue
1723       for prinode, instances in n_img.sbp.items():
1724         needed_mem = 0
1725         for instance in instances:
1726           bep = cluster_info.FillBE(instance_cfg[instance])
1727           if bep[constants.BE_AUTO_BALANCE]:
1728             needed_mem += bep[constants.BE_MEMORY]
1729         test = n_img.mfree < needed_mem
1730         self._ErrorIf(test, self.ENODEN1, node,
1731                       "not enough memory to accomodate instance failovers"
1732                       " should node %s fail (%dMiB needed, %dMiB available)",
1733                       prinode, needed_mem, n_img.mfree)
1734
1735   @classmethod
1736   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1737                    (files_all, files_all_opt, files_mc, files_vm)):
1738     """Verifies file checksums collected from all nodes.
1739
1740     @param errorif: Callback for reporting errors
1741     @param nodeinfo: List of L{objects.Node} objects
1742     @param master_node: Name of master node
1743     @param all_nvinfo: RPC results
1744
1745     """
1746     node_names = frozenset(node.name for node in nodeinfo)
1747
1748     assert master_node in node_names
1749     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1750             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1751            "Found file listed in more than one file list"
1752
1753     # Define functions determining which nodes to consider for a file
1754     file2nodefn = dict([(filename, fn)
1755       for (files, fn) in [(files_all, None),
1756                           (files_all_opt, None),
1757                           (files_mc, lambda node: (node.master_candidate or
1758                                                    node.name == master_node)),
1759                           (files_vm, lambda node: node.vm_capable)]
1760       for filename in files])
1761
1762     fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1763
1764     for node in nodeinfo:
1765       nresult = all_nvinfo[node.name]
1766
1767       if nresult.fail_msg or not nresult.payload:
1768         node_files = None
1769       else:
1770         node_files = nresult.payload.get(constants.NV_FILELIST, None)
1771
1772       test = not (node_files and isinstance(node_files, dict))
1773       errorif(test, cls.ENODEFILECHECK, node.name,
1774               "Node did not return file checksum data")
1775       if test:
1776         continue
1777
1778       for (filename, checksum) in node_files.items():
1779         # Check if the file should be considered for a node
1780         fn = file2nodefn[filename]
1781         if fn is None or fn(node):
1782           fileinfo[filename].setdefault(checksum, set()).add(node.name)
1783
1784     for (filename, checksums) in fileinfo.items():
1785       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1786
1787       # Nodes having the file
1788       with_file = frozenset(node_name
1789                             for nodes in fileinfo[filename].values()
1790                             for node_name in nodes)
1791
1792       # Nodes missing file
1793       missing_file = node_names - with_file
1794
1795       if filename in files_all_opt:
1796         # All or no nodes
1797         errorif(missing_file and missing_file != node_names,
1798                 cls.ECLUSTERFILECHECK, None,
1799                 "File %s is optional, but it must exist on all or no nodes (not"
1800                 " found on %s)",
1801                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1802       else:
1803         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1804                 "File %s is missing from node(s) %s", filename,
1805                 utils.CommaJoin(utils.NiceSort(missing_file)))
1806
1807       # See if there are multiple versions of the file
1808       test = len(checksums) > 1
1809       if test:
1810         variants = ["variant %s on %s" %
1811                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1812                     for (idx, (checksum, nodes)) in
1813                       enumerate(sorted(checksums.items()))]
1814       else:
1815         variants = []
1816
1817       errorif(test, cls.ECLUSTERFILECHECK, None,
1818               "File %s found with %s different checksums (%s)",
1819               filename, len(checksums), "; ".join(variants))
1820
1821   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1822                       drbd_map):
1823     """Verifies and the node DRBD status.
1824
1825     @type ninfo: L{objects.Node}
1826     @param ninfo: the node to check
1827     @param nresult: the remote results for the node
1828     @param instanceinfo: the dict of instances
1829     @param drbd_helper: the configured DRBD usermode helper
1830     @param drbd_map: the DRBD map as returned by
1831         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1832
1833     """
1834     node = ninfo.name
1835     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1836
1837     if drbd_helper:
1838       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1839       test = (helper_result == None)
1840       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1841                "no drbd usermode helper returned")
1842       if helper_result:
1843         status, payload = helper_result
1844         test = not status
1845         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1846                  "drbd usermode helper check unsuccessful: %s", payload)
1847         test = status and (payload != drbd_helper)
1848         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1849                  "wrong drbd usermode helper: %s", payload)
1850
1851     # compute the DRBD minors
1852     node_drbd = {}
1853     for minor, instance in drbd_map[node].items():
1854       test = instance not in instanceinfo
1855       _ErrorIf(test, self.ECLUSTERCFG, None,
1856                "ghost instance '%s' in temporary DRBD map", instance)
1857         # ghost instance should not be running, but otherwise we
1858         # don't give double warnings (both ghost instance and
1859         # unallocated minor in use)
1860       if test:
1861         node_drbd[minor] = (instance, False)
1862       else:
1863         instance = instanceinfo[instance]
1864         node_drbd[minor] = (instance.name, instance.admin_up)
1865
1866     # and now check them
1867     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1868     test = not isinstance(used_minors, (tuple, list))
1869     _ErrorIf(test, self.ENODEDRBD, node,
1870              "cannot parse drbd status file: %s", str(used_minors))
1871     if test:
1872       # we cannot check drbd status
1873       return
1874
1875     for minor, (iname, must_exist) in node_drbd.items():
1876       test = minor not in used_minors and must_exist
1877       _ErrorIf(test, self.ENODEDRBD, node,
1878                "drbd minor %d of instance %s is not active", minor, iname)
1879     for minor in used_minors:
1880       test = minor not in node_drbd
1881       _ErrorIf(test, self.ENODEDRBD, node,
1882                "unallocated drbd minor %d is in use", minor)
1883
1884   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1885     """Builds the node OS structures.
1886
1887     @type ninfo: L{objects.Node}
1888     @param ninfo: the node to check
1889     @param nresult: the remote results for the node
1890     @param nimg: the node image object
1891
1892     """
1893     node = ninfo.name
1894     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1895
1896     remote_os = nresult.get(constants.NV_OSLIST, None)
1897     test = (not isinstance(remote_os, list) or
1898             not compat.all(isinstance(v, list) and len(v) == 7
1899                            for v in remote_os))
1900
1901     _ErrorIf(test, self.ENODEOS, node,
1902              "node hasn't returned valid OS data")
1903
1904     nimg.os_fail = test
1905
1906     if test:
1907       return
1908
1909     os_dict = {}
1910
1911     for (name, os_path, status, diagnose,
1912          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1913
1914       if name not in os_dict:
1915         os_dict[name] = []
1916
1917       # parameters is a list of lists instead of list of tuples due to
1918       # JSON lacking a real tuple type, fix it:
1919       parameters = [tuple(v) for v in parameters]
1920       os_dict[name].append((os_path, status, diagnose,
1921                             set(variants), set(parameters), set(api_ver)))
1922
1923     nimg.oslist = os_dict
1924
1925   def _VerifyNodeOS(self, ninfo, nimg, base):
1926     """Verifies the node OS list.
1927
1928     @type ninfo: L{objects.Node}
1929     @param ninfo: the node to check
1930     @param nimg: the node image object
1931     @param base: the 'template' node we match against (e.g. from the master)
1932
1933     """
1934     node = ninfo.name
1935     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1936
1937     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1938
1939     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1940     for os_name, os_data in nimg.oslist.items():
1941       assert os_data, "Empty OS status for OS %s?!" % os_name
1942       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1943       _ErrorIf(not f_status, self.ENODEOS, node,
1944                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1945       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1946                "OS '%s' has multiple entries (first one shadows the rest): %s",
1947                os_name, utils.CommaJoin([v[0] for v in os_data]))
1948       # this will catched in backend too
1949       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1950                and not f_var, self.ENODEOS, node,
1951                "OS %s with API at least %d does not declare any variant",
1952                os_name, constants.OS_API_V15)
1953       # comparisons with the 'base' image
1954       test = os_name not in base.oslist
1955       _ErrorIf(test, self.ENODEOS, node,
1956                "Extra OS %s not present on reference node (%s)",
1957                os_name, base.name)
1958       if test:
1959         continue
1960       assert base.oslist[os_name], "Base node has empty OS status?"
1961       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1962       if not b_status:
1963         # base OS is invalid, skipping
1964         continue
1965       for kind, a, b in [("API version", f_api, b_api),
1966                          ("variants list", f_var, b_var),
1967                          ("parameters", beautify_params(f_param),
1968                           beautify_params(b_param))]:
1969         _ErrorIf(a != b, self.ENODEOS, node,
1970                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
1971                  kind, os_name, base.name,
1972                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1973
1974     # check any missing OSes
1975     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1976     _ErrorIf(missing, self.ENODEOS, node,
1977              "OSes present on reference node %s but missing on this node: %s",
1978              base.name, utils.CommaJoin(missing))
1979
1980   def _VerifyOob(self, ninfo, nresult):
1981     """Verifies out of band functionality of a node.
1982
1983     @type ninfo: L{objects.Node}
1984     @param ninfo: the node to check
1985     @param nresult: the remote results for the node
1986
1987     """
1988     node = ninfo.name
1989     # We just have to verify the paths on master and/or master candidates
1990     # as the oob helper is invoked on the master
1991     if ((ninfo.master_candidate or ninfo.master_capable) and
1992         constants.NV_OOB_PATHS in nresult):
1993       for path_result in nresult[constants.NV_OOB_PATHS]:
1994         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1995
1996   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1997     """Verifies and updates the node volume data.
1998
1999     This function will update a L{NodeImage}'s internal structures
2000     with data from the remote call.
2001
2002     @type ninfo: L{objects.Node}
2003     @param ninfo: the node to check
2004     @param nresult: the remote results for the node
2005     @param nimg: the node image object
2006     @param vg_name: the configured VG name
2007
2008     """
2009     node = ninfo.name
2010     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2011
2012     nimg.lvm_fail = True
2013     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2014     if vg_name is None:
2015       pass
2016     elif isinstance(lvdata, basestring):
2017       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2018                utils.SafeEncode(lvdata))
2019     elif not isinstance(lvdata, dict):
2020       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2021     else:
2022       nimg.volumes = lvdata
2023       nimg.lvm_fail = False
2024
2025   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2026     """Verifies and updates the node instance list.
2027
2028     If the listing was successful, then updates this node's instance
2029     list. Otherwise, it marks the RPC call as failed for the instance
2030     list key.
2031
2032     @type ninfo: L{objects.Node}
2033     @param ninfo: the node to check
2034     @param nresult: the remote results for the node
2035     @param nimg: the node image object
2036
2037     """
2038     idata = nresult.get(constants.NV_INSTANCELIST, None)
2039     test = not isinstance(idata, list)
2040     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2041                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2042     if test:
2043       nimg.hyp_fail = True
2044     else:
2045       nimg.instances = idata
2046
2047   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2048     """Verifies and computes a node information map
2049
2050     @type ninfo: L{objects.Node}
2051     @param ninfo: the node to check
2052     @param nresult: the remote results for the node
2053     @param nimg: the node image object
2054     @param vg_name: the configured VG name
2055
2056     """
2057     node = ninfo.name
2058     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2059
2060     # try to read free memory (from the hypervisor)
2061     hv_info = nresult.get(constants.NV_HVINFO, None)
2062     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2063     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2064     if not test:
2065       try:
2066         nimg.mfree = int(hv_info["memory_free"])
2067       except (ValueError, TypeError):
2068         _ErrorIf(True, self.ENODERPC, node,
2069                  "node returned invalid nodeinfo, check hypervisor")
2070
2071     # FIXME: devise a free space model for file based instances as well
2072     if vg_name is not None:
2073       test = (constants.NV_VGLIST not in nresult or
2074               vg_name not in nresult[constants.NV_VGLIST])
2075       _ErrorIf(test, self.ENODELVM, node,
2076                "node didn't return data for the volume group '%s'"
2077                " - it is either missing or broken", vg_name)
2078       if not test:
2079         try:
2080           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2081         except (ValueError, TypeError):
2082           _ErrorIf(True, self.ENODERPC, node,
2083                    "node returned invalid LVM info, check LVM status")
2084
2085   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2086     """Gets per-disk status information for all instances.
2087
2088     @type nodelist: list of strings
2089     @param nodelist: Node names
2090     @type node_image: dict of (name, L{objects.Node})
2091     @param node_image: Node objects
2092     @type instanceinfo: dict of (name, L{objects.Instance})
2093     @param instanceinfo: Instance objects
2094     @rtype: {instance: {node: [(succes, payload)]}}
2095     @return: a dictionary of per-instance dictionaries with nodes as
2096         keys and disk information as values; the disk information is a
2097         list of tuples (success, payload)
2098
2099     """
2100     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2101
2102     node_disks = {}
2103     node_disks_devonly = {}
2104     diskless_instances = set()
2105     diskless = constants.DT_DISKLESS
2106
2107     for nname in nodelist:
2108       node_instances = list(itertools.chain(node_image[nname].pinst,
2109                                             node_image[nname].sinst))
2110       diskless_instances.update(inst for inst in node_instances
2111                                 if instanceinfo[inst].disk_template == diskless)
2112       disks = [(inst, disk)
2113                for inst in node_instances
2114                for disk in instanceinfo[inst].disks]
2115
2116       if not disks:
2117         # No need to collect data
2118         continue
2119
2120       node_disks[nname] = disks
2121
2122       # Creating copies as SetDiskID below will modify the objects and that can
2123       # lead to incorrect data returned from nodes
2124       devonly = [dev.Copy() for (_, dev) in disks]
2125
2126       for dev in devonly:
2127         self.cfg.SetDiskID(dev, nname)
2128
2129       node_disks_devonly[nname] = devonly
2130
2131     assert len(node_disks) == len(node_disks_devonly)
2132
2133     # Collect data from all nodes with disks
2134     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2135                                                           node_disks_devonly)
2136
2137     assert len(result) == len(node_disks)
2138
2139     instdisk = {}
2140
2141     for (nname, nres) in result.items():
2142       disks = node_disks[nname]
2143
2144       if nres.offline:
2145         # No data from this node
2146         data = len(disks) * [(False, "node offline")]
2147       else:
2148         msg = nres.fail_msg
2149         _ErrorIf(msg, self.ENODERPC, nname,
2150                  "while getting disk information: %s", msg)
2151         if msg:
2152           # No data from this node
2153           data = len(disks) * [(False, msg)]
2154         else:
2155           data = []
2156           for idx, i in enumerate(nres.payload):
2157             if isinstance(i, (tuple, list)) and len(i) == 2:
2158               data.append(i)
2159             else:
2160               logging.warning("Invalid result from node %s, entry %d: %s",
2161                               nname, idx, i)
2162               data.append((False, "Invalid result from the remote node"))
2163
2164       for ((inst, _), status) in zip(disks, data):
2165         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2166
2167     # Add empty entries for diskless instances.
2168     for inst in diskless_instances:
2169       assert inst not in instdisk
2170       instdisk[inst] = {}
2171
2172     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2173                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2174                       compat.all(isinstance(s, (tuple, list)) and
2175                                  len(s) == 2 for s in statuses)
2176                       for inst, nnames in instdisk.items()
2177                       for nname, statuses in nnames.items())
2178     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2179
2180     return instdisk
2181
2182   def _VerifyHVP(self, hvp_data):
2183     """Verifies locally the syntax of the hypervisor parameters.
2184
2185     """
2186     for item, hv_name, hv_params in hvp_data:
2187       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2188              (item, hv_name))
2189       try:
2190         hv_class = hypervisor.GetHypervisor(hv_name)
2191         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2192         hv_class.CheckParameterSyntax(hv_params)
2193       except errors.GenericError, err:
2194         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2195
2196   def BuildHooksEnv(self):
2197     """Build hooks env.
2198
2199     Cluster-Verify hooks just ran in the post phase and their failure makes
2200     the output be logged in the verify output and the verification to fail.
2201
2202     """
2203     cfg = self.cfg
2204
2205     env = {
2206       "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2207       }
2208
2209     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2210                for node in cfg.GetAllNodesInfo().values())
2211
2212     return env
2213
2214   def BuildHooksNodes(self):
2215     """Build hooks nodes.
2216
2217     """
2218     return ([], self.cfg.GetNodeList())
2219
2220   def Exec(self, feedback_fn):
2221     """Verify integrity of cluster, performing various test on nodes.
2222
2223     """
2224     # This method has too many local variables. pylint: disable-msg=R0914
2225     self.bad = False
2226     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2227     verbose = self.op.verbose
2228     self._feedback_fn = feedback_fn
2229     feedback_fn("* Verifying global settings")
2230     for msg in self.cfg.VerifyConfig():
2231       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2232
2233     # Check the cluster certificates
2234     for cert_filename in constants.ALL_CERT_FILES:
2235       (errcode, msg) = _VerifyCertificate(cert_filename)
2236       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2237
2238     vg_name = self.cfg.GetVGName()
2239     drbd_helper = self.cfg.GetDRBDHelper()
2240     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2241     cluster = self.cfg.GetClusterInfo()
2242     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2243     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2244     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2245     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2246     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2247                         for iname in instancelist)
2248     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2249     i_non_redundant = [] # Non redundant instances
2250     i_non_a_balanced = [] # Non auto-balanced instances
2251     n_offline = 0 # Count of offline nodes
2252     n_drained = 0 # Count of nodes being drained
2253     node_vol_should = {}
2254
2255     # FIXME: verify OS list
2256
2257     # File verification
2258     filemap = _ComputeAncillaryFiles(cluster, False)
2259
2260     # do local checksums
2261     master_node = self.master_node = self.cfg.GetMasterNode()
2262     master_ip = self.cfg.GetMasterIP()
2263
2264     # Compute the set of hypervisor parameters
2265     hvp_data = []
2266     for hv_name in hypervisors:
2267       hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2268     for os_name, os_hvp in cluster.os_hvp.items():
2269       for hv_name, hv_params in os_hvp.items():
2270         if not hv_params:
2271           continue
2272         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2273         hvp_data.append(("os %s" % os_name, hv_name, full_params))
2274     # TODO: collapse identical parameter values in a single one
2275     for instance in instanceinfo.values():
2276       if not instance.hvparams:
2277         continue
2278       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2279                        cluster.FillHV(instance)))
2280     # and verify them locally
2281     self._VerifyHVP(hvp_data)
2282
2283     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2284     node_verify_param = {
2285       constants.NV_FILELIST:
2286         utils.UniqueSequence(filename
2287                              for files in filemap
2288                              for filename in files),
2289       constants.NV_NODELIST: [node.name for node in nodeinfo
2290                               if not node.offline],
2291       constants.NV_HYPERVISOR: hypervisors,
2292       constants.NV_HVPARAMS: hvp_data,
2293       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2294                                   node.secondary_ip) for node in nodeinfo
2295                                  if not node.offline],
2296       constants.NV_INSTANCELIST: hypervisors,
2297       constants.NV_VERSION: None,
2298       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2299       constants.NV_NODESETUP: None,
2300       constants.NV_TIME: None,
2301       constants.NV_MASTERIP: (master_node, master_ip),
2302       constants.NV_OSLIST: None,
2303       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2304       }
2305
2306     if vg_name is not None:
2307       node_verify_param[constants.NV_VGLIST] = None
2308       node_verify_param[constants.NV_LVLIST] = vg_name
2309       node_verify_param[constants.NV_PVLIST] = [vg_name]
2310       node_verify_param[constants.NV_DRBDLIST] = None
2311
2312     if drbd_helper:
2313       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2314
2315     # Build our expected cluster state
2316     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2317                                                  name=node.name,
2318                                                  vm_capable=node.vm_capable))
2319                       for node in nodeinfo)
2320
2321     # Gather OOB paths
2322     oob_paths = []
2323     for node in nodeinfo:
2324       path = _SupportsOob(self.cfg, node)
2325       if path and path not in oob_paths:
2326         oob_paths.append(path)
2327
2328     if oob_paths:
2329       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2330
2331     for instance in instancelist:
2332       inst_config = instanceinfo[instance]
2333
2334       for nname in inst_config.all_nodes:
2335         if nname not in node_image:
2336           # ghost node
2337           gnode = self.NodeImage(name=nname)
2338           gnode.ghost = True
2339           node_image[nname] = gnode
2340
2341       inst_config.MapLVsByNode(node_vol_should)
2342
2343       pnode = inst_config.primary_node
2344       node_image[pnode].pinst.append(instance)
2345
2346       for snode in inst_config.secondary_nodes:
2347         nimg = node_image[snode]
2348         nimg.sinst.append(instance)
2349         if pnode not in nimg.sbp:
2350           nimg.sbp[pnode] = []
2351         nimg.sbp[pnode].append(instance)
2352
2353     # At this point, we have the in-memory data structures complete,
2354     # except for the runtime information, which we'll gather next
2355
2356     # Due to the way our RPC system works, exact response times cannot be
2357     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2358     # time before and after executing the request, we can at least have a time
2359     # window.
2360     nvinfo_starttime = time.time()
2361     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2362                                            self.cfg.GetClusterName())
2363     nvinfo_endtime = time.time()
2364
2365     all_drbd_map = self.cfg.ComputeDRBDMap()
2366
2367     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2368     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2369
2370     feedback_fn("* Verifying configuration file consistency")
2371     self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
2372
2373     feedback_fn("* Verifying node status")
2374
2375     refos_img = None
2376
2377     for node_i in nodeinfo:
2378       node = node_i.name
2379       nimg = node_image[node]
2380
2381       if node_i.offline:
2382         if verbose:
2383           feedback_fn("* Skipping offline node %s" % (node,))
2384         n_offline += 1
2385         continue
2386
2387       if node == master_node:
2388         ntype = "master"
2389       elif node_i.master_candidate:
2390         ntype = "master candidate"
2391       elif node_i.drained:
2392         ntype = "drained"
2393         n_drained += 1
2394       else:
2395         ntype = "regular"
2396       if verbose:
2397         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2398
2399       msg = all_nvinfo[node].fail_msg
2400       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2401       if msg:
2402         nimg.rpc_fail = True
2403         continue
2404
2405       nresult = all_nvinfo[node].payload
2406
2407       nimg.call_ok = self._VerifyNode(node_i, nresult)
2408       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2409       self._VerifyNodeNetwork(node_i, nresult)
2410       self._VerifyOob(node_i, nresult)
2411
2412       if nimg.vm_capable:
2413         self._VerifyNodeLVM(node_i, nresult, vg_name)
2414         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2415                              all_drbd_map)
2416
2417         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2418         self._UpdateNodeInstances(node_i, nresult, nimg)
2419         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2420         self._UpdateNodeOS(node_i, nresult, nimg)
2421         if not nimg.os_fail:
2422           if refos_img is None:
2423             refos_img = nimg
2424           self._VerifyNodeOS(node_i, nimg, refos_img)
2425
2426     feedback_fn("* Verifying instance status")
2427     for instance in instancelist:
2428       if verbose:
2429         feedback_fn("* Verifying instance %s" % instance)
2430       inst_config = instanceinfo[instance]
2431       self._VerifyInstance(instance, inst_config, node_image,
2432                            instdisk[instance])
2433       inst_nodes_offline = []
2434
2435       pnode = inst_config.primary_node
2436       pnode_img = node_image[pnode]
2437       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2438                self.ENODERPC, pnode, "instance %s, connection to"
2439                " primary node failed", instance)
2440
2441       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2442                self.EINSTANCEBADNODE, instance,
2443                "instance is marked as running and lives on offline node %s",
2444                inst_config.primary_node)
2445
2446       # If the instance is non-redundant we cannot survive losing its primary
2447       # node, so we are not N+1 compliant. On the other hand we have no disk
2448       # templates with more than one secondary so that situation is not well
2449       # supported either.
2450       # FIXME: does not support file-backed instances
2451       if not inst_config.secondary_nodes:
2452         i_non_redundant.append(instance)
2453
2454       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2455                instance, "instance has multiple secondary nodes: %s",
2456                utils.CommaJoin(inst_config.secondary_nodes),
2457                code=self.ETYPE_WARNING)
2458
2459       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2460         pnode = inst_config.primary_node
2461         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2462         instance_groups = {}
2463
2464         for node in instance_nodes:
2465           instance_groups.setdefault(nodeinfo_byname[node].group,
2466                                      []).append(node)
2467
2468         pretty_list = [
2469           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2470           # Sort so that we always list the primary node first.
2471           for group, nodes in sorted(instance_groups.items(),
2472                                      key=lambda (_, nodes): pnode in nodes,
2473                                      reverse=True)]
2474
2475         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2476                       instance, "instance has primary and secondary nodes in"
2477                       " different groups: %s", utils.CommaJoin(pretty_list),
2478                       code=self.ETYPE_WARNING)
2479
2480       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2481         i_non_a_balanced.append(instance)
2482
2483       for snode in inst_config.secondary_nodes:
2484         s_img = node_image[snode]
2485         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2486                  "instance %s, connection to secondary node failed", instance)
2487
2488         if s_img.offline:
2489           inst_nodes_offline.append(snode)
2490
2491       # warn that the instance lives on offline nodes
2492       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2493                "instance has offline secondary node(s) %s",
2494                utils.CommaJoin(inst_nodes_offline))
2495       # ... or ghost/non-vm_capable nodes
2496       for node in inst_config.all_nodes:
2497         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2498                  "instance lives on ghost node %s", node)
2499         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2500                  instance, "instance lives on non-vm_capable node %s", node)
2501
2502     feedback_fn("* Verifying orphan volumes")
2503     reserved = utils.FieldSet(*cluster.reserved_lvs)
2504     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2505
2506     feedback_fn("* Verifying orphan instances")
2507     self._VerifyOrphanInstances(instancelist, node_image)
2508
2509     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2510       feedback_fn("* Verifying N+1 Memory redundancy")
2511       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2512
2513     feedback_fn("* Other Notes")
2514     if i_non_redundant:
2515       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2516                   % len(i_non_redundant))
2517
2518     if i_non_a_balanced:
2519       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2520                   % len(i_non_a_balanced))
2521
2522     if n_offline:
2523       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2524
2525     if n_drained:
2526       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2527
2528     return not self.bad
2529
2530   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2531     """Analyze the post-hooks' result
2532
2533     This method analyses the hook result, handles it, and sends some
2534     nicely-formatted feedback back to the user.
2535
2536     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2537         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2538     @param hooks_results: the results of the multi-node hooks rpc call
2539     @param feedback_fn: function used send feedback back to the caller
2540     @param lu_result: previous Exec result
2541     @return: the new Exec result, based on the previous result
2542         and hook results
2543
2544     """
2545     # We only really run POST phase hooks, and are only interested in
2546     # their results
2547     if phase == constants.HOOKS_PHASE_POST:
2548       # Used to change hooks' output to proper indentation
2549       feedback_fn("* Hooks Results")
2550       assert hooks_results, "invalid result from hooks"
2551
2552       for node_name in hooks_results:
2553         res = hooks_results[node_name]
2554         msg = res.fail_msg
2555         test = msg and not res.offline
2556         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2557                       "Communication failure in hooks execution: %s", msg)
2558         if res.offline or msg:
2559           # No need to investigate payload if node is offline or gave an error.
2560           # override manually lu_result here as _ErrorIf only
2561           # overrides self.bad
2562           lu_result = 1
2563           continue
2564         for script, hkr, output in res.payload:
2565           test = hkr == constants.HKR_FAIL
2566           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2567                         "Script %s failed, output:", script)
2568           if test:
2569             output = self._HOOKS_INDENT_RE.sub('      ', output)
2570             feedback_fn("%s" % output)
2571             lu_result = 0
2572
2573       return lu_result
2574
2575
2576 class LUClusterVerifyDisks(NoHooksLU):
2577   """Verifies the cluster disks status.
2578
2579   """
2580   REQ_BGL = False
2581
2582   def ExpandNames(self):
2583     self.needed_locks = {
2584       locking.LEVEL_NODE: locking.ALL_SET,
2585       locking.LEVEL_INSTANCE: locking.ALL_SET,
2586     }
2587     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2588
2589   def Exec(self, feedback_fn):
2590     """Verify integrity of cluster disks.
2591
2592     @rtype: tuple of three items
2593     @return: a tuple of (dict of node-to-node_error, list of instances
2594         which need activate-disks, dict of instance: (node, volume) for
2595         missing volumes
2596
2597     """
2598     result = res_nodes, res_instances, res_missing = {}, [], {}
2599
2600     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2601     instances = self.cfg.GetAllInstancesInfo().values()
2602
2603     nv_dict = {}
2604     for inst in instances:
2605       inst_lvs = {}
2606       if not inst.admin_up:
2607         continue
2608       inst.MapLVsByNode(inst_lvs)
2609       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2610       for node, vol_list in inst_lvs.iteritems():
2611         for vol in vol_list:
2612           nv_dict[(node, vol)] = inst
2613
2614     if not nv_dict:
2615       return result
2616
2617     node_lvs = self.rpc.call_lv_list(nodes, [])
2618     for node, node_res in node_lvs.items():
2619       if node_res.offline:
2620         continue
2621       msg = node_res.fail_msg
2622       if msg:
2623         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2624         res_nodes[node] = msg
2625         continue
2626
2627       lvs = node_res.payload
2628       for lv_name, (_, _, lv_online) in lvs.items():
2629         inst = nv_dict.pop((node, lv_name), None)
2630         if (not lv_online and inst is not None
2631             and inst.name not in res_instances):
2632           res_instances.append(inst.name)
2633
2634     # any leftover items in nv_dict are missing LVs, let's arrange the
2635     # data better
2636     for key, inst in nv_dict.iteritems():
2637       if inst.name not in res_missing:
2638         res_missing[inst.name] = []
2639       res_missing[inst.name].append(key)
2640
2641     return result
2642
2643
2644 class LUClusterRepairDiskSizes(NoHooksLU):
2645   """Verifies the cluster disks sizes.
2646
2647   """
2648   REQ_BGL = False
2649
2650   def ExpandNames(self):
2651     if self.op.instances:
2652       self.wanted_names = _GetWantedInstances(self, self.op.instances)
2653       self.needed_locks = {
2654         locking.LEVEL_NODE: [],
2655         locking.LEVEL_INSTANCE: self.wanted_names,
2656         }
2657       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2658     else:
2659       self.wanted_names = None
2660       self.needed_locks = {
2661         locking.LEVEL_NODE: locking.ALL_SET,
2662         locking.LEVEL_INSTANCE: locking.ALL_SET,
2663         }
2664     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2665
2666   def DeclareLocks(self, level):
2667     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2668       self._LockInstancesNodes(primary_only=True)
2669
2670   def CheckPrereq(self):
2671     """Check prerequisites.
2672
2673     This only checks the optional instance list against the existing names.
2674
2675     """
2676     if self.wanted_names is None:
2677       self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
2678
2679     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2680                              in self.wanted_names]
2681
2682   def _EnsureChildSizes(self, disk):
2683     """Ensure children of the disk have the needed disk size.
2684
2685     This is valid mainly for DRBD8 and fixes an issue where the
2686     children have smaller disk size.
2687
2688     @param disk: an L{ganeti.objects.Disk} object
2689
2690     """
2691     if disk.dev_type == constants.LD_DRBD8:
2692       assert disk.children, "Empty children for DRBD8?"
2693       fchild = disk.children[0]
2694       mismatch = fchild.size < disk.size
2695       if mismatch:
2696         self.LogInfo("Child disk has size %d, parent %d, fixing",
2697                      fchild.size, disk.size)
2698         fchild.size = disk.size
2699
2700       # and we recurse on this child only, not on the metadev
2701       return self._EnsureChildSizes(fchild) or mismatch
2702     else:
2703       return False
2704
2705   def Exec(self, feedback_fn):
2706     """Verify the size of cluster disks.
2707
2708     """
2709     # TODO: check child disks too
2710     # TODO: check differences in size between primary/secondary nodes
2711     per_node_disks = {}
2712     for instance in self.wanted_instances:
2713       pnode = instance.primary_node
2714       if pnode not in per_node_disks:
2715         per_node_disks[pnode] = []
2716       for idx, disk in enumerate(instance.disks):
2717         per_node_disks[pnode].append((instance, idx, disk))
2718
2719     changed = []
2720     for node, dskl in per_node_disks.items():
2721       newl = [v[2].Copy() for v in dskl]
2722       for dsk in newl:
2723         self.cfg.SetDiskID(dsk, node)
2724       result = self.rpc.call_blockdev_getsize(node, newl)
2725       if result.fail_msg:
2726         self.LogWarning("Failure in blockdev_getsize call to node"
2727                         " %s, ignoring", node)
2728         continue
2729       if len(result.payload) != len(dskl):
2730         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2731                         " result.payload=%s", node, len(dskl), result.payload)
2732         self.LogWarning("Invalid result from node %s, ignoring node results",
2733                         node)
2734         continue
2735       for ((instance, idx, disk), size) in zip(dskl, result.payload):
2736         if size is None:
2737           self.LogWarning("Disk %d of instance %s did not return size"
2738                           " information, ignoring", idx, instance.name)
2739           continue
2740         if not isinstance(size, (int, long)):
2741           self.LogWarning("Disk %d of instance %s did not return valid"
2742                           " size information, ignoring", idx, instance.name)
2743           continue
2744         size = size >> 20
2745         if size != disk.size:
2746           self.LogInfo("Disk %d of instance %s has mismatched size,"
2747                        " correcting: recorded %d, actual %d", idx,
2748                        instance.name, disk.size, size)
2749           disk.size = size
2750           self.cfg.Update(instance, feedback_fn)
2751           changed.append((instance.name, idx, size))
2752         if self._EnsureChildSizes(disk):
2753           self.cfg.Update(instance, feedback_fn)
2754           changed.append((instance.name, idx, disk.size))
2755     return changed
2756
2757
2758 class LUClusterRename(LogicalUnit):
2759   """Rename the cluster.
2760
2761   """
2762   HPATH = "cluster-rename"
2763   HTYPE = constants.HTYPE_CLUSTER
2764
2765   def BuildHooksEnv(self):
2766     """Build hooks env.
2767
2768     """
2769     return {
2770       "OP_TARGET": self.cfg.GetClusterName(),
2771       "NEW_NAME": self.op.name,
2772       }
2773
2774   def BuildHooksNodes(self):
2775     """Build hooks nodes.
2776
2777     """
2778     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2779
2780   def CheckPrereq(self):
2781     """Verify that the passed name is a valid one.
2782
2783     """
2784     hostname = netutils.GetHostname(name=self.op.name,
2785                                     family=self.cfg.GetPrimaryIPFamily())
2786
2787     new_name = hostname.name
2788     self.ip = new_ip = hostname.ip
2789     old_name = self.cfg.GetClusterName()
2790     old_ip = self.cfg.GetMasterIP()
2791     if new_name == old_name and new_ip == old_ip:
2792       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2793                                  " cluster has changed",
2794                                  errors.ECODE_INVAL)
2795     if new_ip != old_ip:
2796       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2797         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2798                                    " reachable on the network" %
2799                                    new_ip, errors.ECODE_NOTUNIQUE)
2800
2801     self.op.name = new_name
2802
2803   def Exec(self, feedback_fn):
2804     """Rename the cluster.
2805
2806     """
2807     clustername = self.op.name
2808     ip = self.ip
2809
2810     # shutdown the master IP
2811     master = self.cfg.GetMasterNode()
2812     result = self.rpc.call_node_stop_master(master, False)
2813     result.Raise("Could not disable the master role")
2814
2815     try:
2816       cluster = self.cfg.GetClusterInfo()
2817       cluster.cluster_name = clustername
2818       cluster.master_ip = ip
2819       self.cfg.Update(cluster, feedback_fn)
2820
2821       # update the known hosts file
2822       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2823       node_list = self.cfg.GetOnlineNodeList()
2824       try:
2825         node_list.remove(master)
2826       except ValueError:
2827         pass
2828       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2829     finally:
2830       result = self.rpc.call_node_start_master(master, False, False)
2831       msg = result.fail_msg
2832       if msg:
2833         self.LogWarning("Could not re-enable the master role on"
2834                         " the master, please restart manually: %s", msg)
2835
2836     return clustername
2837
2838
2839 class LUClusterSetParams(LogicalUnit):
2840   """Change the parameters of the cluster.
2841
2842   """
2843   HPATH = "cluster-modify"
2844   HTYPE = constants.HTYPE_CLUSTER
2845   REQ_BGL = False
2846
2847   def CheckArguments(self):
2848     """Check parameters
2849
2850     """
2851     if self.op.uid_pool:
2852       uidpool.CheckUidPool(self.op.uid_pool)
2853
2854     if self.op.add_uids:
2855       uidpool.CheckUidPool(self.op.add_uids)
2856
2857     if self.op.remove_uids:
2858       uidpool.CheckUidPool(self.op.remove_uids)
2859
2860   def ExpandNames(self):
2861     # FIXME: in the future maybe other cluster params won't require checking on
2862     # all nodes to be modified.
2863     self.needed_locks = {
2864       locking.LEVEL_NODE: locking.ALL_SET,
2865     }
2866     self.share_locks[locking.LEVEL_NODE] = 1
2867
2868   def BuildHooksEnv(self):
2869     """Build hooks env.
2870
2871     """
2872     return {
2873       "OP_TARGET": self.cfg.GetClusterName(),
2874       "NEW_VG_NAME": self.op.vg_name,
2875       }
2876
2877   def BuildHooksNodes(self):
2878     """Build hooks nodes.
2879
2880     """
2881     mn = self.cfg.GetMasterNode()
2882     return ([mn], [mn])
2883
2884   def CheckPrereq(self):
2885     """Check prerequisites.
2886
2887     This checks whether the given params don't conflict and
2888     if the given volume group is valid.
2889
2890     """
2891     if self.op.vg_name is not None and not self.op.vg_name:
2892       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2893         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2894                                    " instances exist", errors.ECODE_INVAL)
2895
2896     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2897       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2898         raise errors.OpPrereqError("Cannot disable drbd helper while"
2899                                    " drbd-based instances exist",
2900                                    errors.ECODE_INVAL)
2901
2902     node_list = self.glm.list_owned(locking.LEVEL_NODE)
2903
2904     # if vg_name not None, checks given volume group on all nodes
2905     if self.op.vg_name:
2906       vglist = self.rpc.call_vg_list(node_list)
2907       for node in node_list:
2908         msg = vglist[node].fail_msg
2909         if msg:
2910           # ignoring down node
2911           self.LogWarning("Error while gathering data on node %s"
2912                           " (ignoring node): %s", node, msg)
2913           continue
2914         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2915                                               self.op.vg_name,
2916                                               constants.MIN_VG_SIZE)
2917         if vgstatus:
2918           raise errors.OpPrereqError("Error on node '%s': %s" %
2919                                      (node, vgstatus), errors.ECODE_ENVIRON)
2920
2921     if self.op.drbd_helper:
2922       # checks given drbd helper on all nodes
2923       helpers = self.rpc.call_drbd_helper(node_list)
2924       for node in node_list:
2925         ninfo = self.cfg.GetNodeInfo(node)
2926         if ninfo.offline:
2927           self.LogInfo("Not checking drbd helper on offline node %s", node)
2928           continue
2929         msg = helpers[node].fail_msg
2930         if msg:
2931           raise errors.OpPrereqError("Error checking drbd helper on node"
2932                                      " '%s': %s" % (node, msg),
2933                                      errors.ECODE_ENVIRON)
2934         node_helper = helpers[node].payload
2935         if node_helper != self.op.drbd_helper:
2936           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2937                                      (node, node_helper), errors.ECODE_ENVIRON)
2938
2939     self.cluster = cluster = self.cfg.GetClusterInfo()
2940     # validate params changes
2941     if self.op.beparams:
2942       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2943       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2944
2945     if self.op.ndparams:
2946       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2947       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2948
2949       # TODO: we need a more general way to handle resetting
2950       # cluster-level parameters to default values
2951       if self.new_ndparams["oob_program"] == "":
2952         self.new_ndparams["oob_program"] = \
2953             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2954
2955     if self.op.nicparams:
2956       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2957       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2958       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2959       nic_errors = []
2960
2961       # check all instances for consistency
2962       for instance in self.cfg.GetAllInstancesInfo().values():
2963         for nic_idx, nic in enumerate(instance.nics):
2964           params_copy = copy.deepcopy(nic.nicparams)
2965           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2966
2967           # check parameter syntax
2968           try:
2969             objects.NIC.CheckParameterSyntax(params_filled)
2970           except errors.ConfigurationError, err:
2971             nic_errors.append("Instance %s, nic/%d: %s" %
2972                               (instance.name, nic_idx, err))
2973
2974           # if we're moving instances to routed, check that they have an ip
2975           target_mode = params_filled[constants.NIC_MODE]
2976           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2977             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2978                               (instance.name, nic_idx))
2979       if nic_errors:
2980         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2981                                    "\n".join(nic_errors))
2982
2983     # hypervisor list/parameters
2984     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2985     if self.op.hvparams:
2986       for hv_name, hv_dict in self.op.hvparams.items():
2987         if hv_name not in self.new_hvparams:
2988           self.new_hvparams[hv_name] = hv_dict
2989         else:
2990           self.new_hvparams[hv_name].update(hv_dict)
2991
2992     # os hypervisor parameters
2993     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2994     if self.op.os_hvp:
2995       for os_name, hvs in self.op.os_hvp.items():
2996         if os_name not in self.new_os_hvp:
2997           self.new_os_hvp[os_name] = hvs
2998         else:
2999           for hv_name, hv_dict in hvs.items():
3000             if hv_name not in self.new_os_hvp[os_name]:
3001               self.new_os_hvp[os_name][hv_name] = hv_dict
3002             else:
3003               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3004
3005     # os parameters
3006     self.new_osp = objects.FillDict(cluster.osparams, {})
3007     if self.op.osparams:
3008       for os_name, osp in self.op.osparams.items():
3009         if os_name not in self.new_osp:
3010           self.new_osp[os_name] = {}
3011
3012         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3013                                                   use_none=True)
3014
3015         if not self.new_osp[os_name]:
3016           # we removed all parameters
3017           del self.new_osp[os_name]
3018         else:
3019           # check the parameter validity (remote check)
3020           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3021                          os_name, self.new_osp[os_name])
3022
3023     # changes to the hypervisor list
3024     if self.op.enabled_hypervisors is not None:
3025       self.hv_list = self.op.enabled_hypervisors
3026       for hv in self.hv_list:
3027         # if the hypervisor doesn't already exist in the cluster
3028         # hvparams, we initialize it to empty, and then (in both
3029         # cases) we make sure to fill the defaults, as we might not
3030         # have a complete defaults list if the hypervisor wasn't
3031         # enabled before
3032         if hv not in new_hvp:
3033           new_hvp[hv] = {}
3034         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3035         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3036     else:
3037       self.hv_list = cluster.enabled_hypervisors
3038
3039     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3040       # either the enabled list has changed, or the parameters have, validate
3041       for hv_name, hv_params in self.new_hvparams.items():
3042         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3043             (self.op.enabled_hypervisors and
3044              hv_name in self.op.enabled_hypervisors)):
3045           # either this is a new hypervisor, or its parameters have changed
3046           hv_class = hypervisor.GetHypervisor(hv_name)
3047           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3048           hv_class.CheckParameterSyntax(hv_params)
3049           _CheckHVParams(self, node_list, hv_name, hv_params)
3050
3051     if self.op.os_hvp:
3052       # no need to check any newly-enabled hypervisors, since the
3053       # defaults have already been checked in the above code-block
3054       for os_name, os_hvp in self.new_os_hvp.items():
3055         for hv_name, hv_params in os_hvp.items():
3056           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3057           # we need to fill in the new os_hvp on top of the actual hv_p
3058           cluster_defaults = self.new_hvparams.get(hv_name, {})
3059           new_osp = objects.FillDict(cluster_defaults, hv_params)
3060           hv_class = hypervisor.GetHypervisor(hv_name)
3061           hv_class.CheckParameterSyntax(new_osp)
3062           _CheckHVParams(self, node_list, hv_name, new_osp)
3063
3064     if self.op.default_iallocator:
3065       alloc_script = utils.FindFile(self.op.default_iallocator,
3066                                     constants.IALLOCATOR_SEARCH_PATH,
3067                                     os.path.isfile)
3068       if alloc_script is None:
3069         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3070                                    " specified" % self.op.default_iallocator,
3071                                    errors.ECODE_INVAL)
3072
3073   def Exec(self, feedback_fn):
3074     """Change the parameters of the cluster.
3075
3076     """
3077     if self.op.vg_name is not None:
3078       new_volume = self.op.vg_name
3079       if not new_volume:
3080         new_volume = None
3081       if new_volume != self.cfg.GetVGName():
3082         self.cfg.SetVGName(new_volume)
3083       else:
3084         feedback_fn("Cluster LVM configuration already in desired"
3085                     " state, not changing")
3086     if self.op.drbd_helper is not None:
3087       new_helper = self.op.drbd_helper
3088       if not new_helper:
3089         new_helper = None
3090       if new_helper != self.cfg.GetDRBDHelper():
3091         self.cfg.SetDRBDHelper(new_helper)
3092       else:
3093         feedback_fn("Cluster DRBD helper already in desired state,"
3094                     " not changing")
3095     if self.op.hvparams:
3096       self.cluster.hvparams = self.new_hvparams
3097     if self.op.os_hvp:
3098       self.cluster.os_hvp = self.new_os_hvp
3099     if self.op.enabled_hypervisors is not None:
3100       self.cluster.hvparams = self.new_hvparams
3101       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3102     if self.op.beparams:
3103       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3104     if self.op.nicparams:
3105       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3106     if self.op.osparams:
3107       self.cluster.osparams = self.new_osp
3108     if self.op.ndparams:
3109       self.cluster.ndparams = self.new_ndparams
3110
3111     if self.op.candidate_pool_size is not None:
3112       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3113       # we need to update the pool size here, otherwise the save will fail
3114       _AdjustCandidatePool(self, [])
3115
3116     if self.op.maintain_node_health is not None:
3117       self.cluster.maintain_node_health = self.op.maintain_node_health
3118
3119     if self.op.prealloc_wipe_disks is not None:
3120       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3121
3122     if self.op.add_uids is not None:
3123       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3124
3125     if self.op.remove_uids is not None:
3126       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3127
3128     if self.op.uid_pool is not None:
3129       self.cluster.uid_pool = self.op.uid_pool
3130
3131     if self.op.default_iallocator is not None:
3132       self.cluster.default_iallocator = self.op.default_iallocator
3133
3134     if self.op.reserved_lvs is not None:
3135       self.cluster.reserved_lvs = self.op.reserved_lvs
3136
3137     def helper_os(aname, mods, desc):
3138       desc += " OS list"
3139       lst = getattr(self.cluster, aname)
3140       for key, val in mods:
3141         if key == constants.DDM_ADD:
3142           if val in lst:
3143             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3144           else:
3145             lst.append(val)
3146         elif key == constants.DDM_REMOVE:
3147           if val in lst:
3148             lst.remove(val)
3149           else:
3150             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3151         else:
3152           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3153
3154     if self.op.hidden_os:
3155       helper_os("hidden_os", self.op.hidden_os, "hidden")
3156
3157     if self.op.blacklisted_os:
3158       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3159
3160     if self.op.master_netdev:
3161       master = self.cfg.GetMasterNode()
3162       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3163                   self.cluster.master_netdev)
3164       result = self.rpc.call_node_stop_master(master, False)
3165       result.Raise("Could not disable the master ip")
3166       feedback_fn("Changing master_netdev from %s to %s" %
3167                   (self.cluster.master_netdev, self.op.master_netdev))
3168       self.cluster.master_netdev = self.op.master_netdev
3169
3170     self.cfg.Update(self.cluster, feedback_fn)
3171
3172     if self.op.master_netdev:
3173       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3174                   self.op.master_netdev)
3175       result = self.rpc.call_node_start_master(master, False, False)
3176       if result.fail_msg:
3177         self.LogWarning("Could not re-enable the master ip on"
3178                         " the master, please restart manually: %s",
3179                         result.fail_msg)
3180
3181
3182 def _UploadHelper(lu, nodes, fname):
3183   """Helper for uploading a file and showing warnings.
3184
3185   """
3186   if os.path.exists(fname):
3187     result = lu.rpc.call_upload_file(nodes, fname)
3188     for to_node, to_result in result.items():
3189       msg = to_result.fail_msg
3190       if msg:
3191         msg = ("Copy of file %s to node %s failed: %s" %
3192                (fname, to_node, msg))
3193         lu.proc.LogWarning(msg)
3194
3195
3196 def _ComputeAncillaryFiles(cluster, redist):
3197   """Compute files external to Ganeti which need to be consistent.
3198
3199   @type redist: boolean
3200   @param redist: Whether to include files which need to be redistributed
3201
3202   """
3203   # Compute files for all nodes
3204   files_all = set([
3205     constants.SSH_KNOWN_HOSTS_FILE,
3206     constants.CONFD_HMAC_KEY,
3207     constants.CLUSTER_DOMAIN_SECRET_FILE,
3208     ])
3209
3210   if not redist:
3211     files_all.update(constants.ALL_CERT_FILES)
3212     files_all.update(ssconf.SimpleStore().GetFileList())
3213
3214   if cluster.modify_etc_hosts:
3215     files_all.add(constants.ETC_HOSTS)
3216
3217   # Files which must either exist on all nodes or on none
3218   files_all_opt = set([
3219     constants.RAPI_USERS_FILE,
3220     ])
3221
3222   # Files which should only be on master candidates
3223   files_mc = set()
3224   if not redist:
3225     files_mc.add(constants.CLUSTER_CONF_FILE)
3226
3227   # Files which should only be on VM-capable nodes
3228   files_vm = set(filename
3229     for hv_name in cluster.enabled_hypervisors
3230     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3231
3232   # Filenames must be unique
3233   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3234           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3235          "Found file listed in more than one file list"
3236
3237   return (files_all, files_all_opt, files_mc, files_vm)
3238
3239
3240 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3241   """Distribute additional files which are part of the cluster configuration.
3242
3243   ConfigWriter takes care of distributing the config and ssconf files, but
3244   there are more files which should be distributed to all nodes. This function
3245   makes sure those are copied.
3246
3247   @param lu: calling logical unit
3248   @param additional_nodes: list of nodes not in the config to distribute to
3249   @type additional_vm: boolean
3250   @param additional_vm: whether the additional nodes are vm-capable or not
3251
3252   """
3253   # Gather target nodes
3254   cluster = lu.cfg.GetClusterInfo()
3255   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3256
3257   online_nodes = lu.cfg.GetOnlineNodeList()
3258   vm_nodes = lu.cfg.GetVmCapableNodeList()
3259
3260   if additional_nodes is not None:
3261     online_nodes.extend(additional_nodes)
3262     if additional_vm:
3263       vm_nodes.extend(additional_nodes)
3264
3265   # Never distribute to master node
3266   for nodelist in [online_nodes, vm_nodes]:
3267     if master_info.name in nodelist:
3268       nodelist.remove(master_info.name)
3269
3270   # Gather file lists
3271   (files_all, files_all_opt, files_mc, files_vm) = \
3272     _ComputeAncillaryFiles(cluster, True)
3273
3274   # Never re-distribute configuration file from here
3275   assert not (constants.CLUSTER_CONF_FILE in files_all or
3276               constants.CLUSTER_CONF_FILE in files_vm)
3277   assert not files_mc, "Master candidates not handled in this function"
3278
3279   filemap = [
3280     (online_nodes, files_all),
3281     (online_nodes, files_all_opt),
3282     (vm_nodes, files_vm),
3283     ]
3284
3285   # Upload the files
3286   for (node_list, files) in filemap:
3287     for fname in files:
3288       _UploadHelper(lu, node_list, fname)
3289
3290
3291 class LUClusterRedistConf(NoHooksLU):
3292   """Force the redistribution of cluster configuration.
3293
3294   This is a very simple LU.
3295
3296   """
3297   REQ_BGL = False
3298
3299   def ExpandNames(self):
3300     self.needed_locks = {
3301       locking.LEVEL_NODE: locking.ALL_SET,
3302     }
3303     self.share_locks[locking.LEVEL_NODE] = 1
3304
3305   def Exec(self, feedback_fn):
3306     """Redistribute the configuration.
3307
3308     """
3309     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3310     _RedistributeAncillaryFiles(self)
3311
3312
3313 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3314   """Sleep and poll for an instance's disk to sync.
3315
3316   """
3317   if not instance.disks or disks is not None and not disks:
3318     return True
3319
3320   disks = _ExpandCheckDisks(instance, disks)
3321
3322   if not oneshot:
3323     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3324
3325   node = instance.primary_node
3326
3327   for dev in disks:
3328     lu.cfg.SetDiskID(dev, node)
3329
3330   # TODO: Convert to utils.Retry
3331
3332   retries = 0
3333   degr_retries = 10 # in seconds, as we sleep 1 second each time
3334   while True:
3335     max_time = 0
3336     done = True
3337     cumul_degraded = False
3338     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3339     msg = rstats.fail_msg
3340     if msg:
3341       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3342       retries += 1
3343       if retries >= 10:
3344         raise errors.RemoteError("Can't contact node %s for mirror data,"
3345                                  " aborting." % node)
3346       time.sleep(6)
3347       continue
3348     rstats = rstats.payload
3349     retries = 0
3350     for i, mstat in enumerate(rstats):
3351       if mstat is None:
3352         lu.LogWarning("Can't compute data for node %s/%s",
3353                            node, disks[i].iv_name)
3354         continue
3355
3356       cumul_degraded = (cumul_degraded or
3357                         (mstat.is_degraded and mstat.sync_percent is None))
3358       if mstat.sync_percent is not None:
3359         done = False
3360         if mstat.estimated_time is not None:
3361           rem_time = ("%s remaining (estimated)" %
3362                       utils.FormatSeconds(mstat.estimated_time))
3363           max_time = mstat.estimated_time
3364         else:
3365           rem_time = "no time estimate"
3366         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3367                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3368
3369     # if we're done but degraded, let's do a few small retries, to
3370     # make sure we see a stable and not transient situation; therefore
3371     # we force restart of the loop
3372     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3373       logging.info("Degraded disks found, %d retries left", degr_retries)
3374       degr_retries -= 1
3375       time.sleep(1)
3376       continue
3377
3378     if done or oneshot:
3379       break
3380
3381     time.sleep(min(60, max_time))
3382
3383   if done:
3384     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3385   return not cumul_degraded
3386
3387
3388 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3389   """Check that mirrors are not degraded.
3390
3391   The ldisk parameter, if True, will change the test from the
3392   is_degraded attribute (which represents overall non-ok status for
3393   the device(s)) to the ldisk (representing the local storage status).
3394
3395   """
3396   lu.cfg.SetDiskID(dev, node)
3397
3398   result = True
3399
3400   if on_primary or dev.AssembleOnSecondary():
3401     rstats = lu.rpc.call_blockdev_find(node, dev)
3402     msg = rstats.fail_msg
3403     if msg:
3404       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3405       result = False
3406     elif not rstats.payload:
3407       lu.LogWarning("Can't find disk on node %s", node)
3408       result = False
3409     else:
3410       if ldisk:
3411         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3412       else:
3413         result = result and not rstats.payload.is_degraded
3414
3415   if dev.children:
3416     for child in dev.children:
3417       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3418
3419   return result
3420
3421
3422 class LUOobCommand(NoHooksLU):
3423   """Logical unit for OOB handling.
3424
3425   """
3426   REG_BGL = False
3427   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3428
3429   def CheckPrereq(self):
3430     """Check prerequisites.
3431
3432     This checks:
3433      - the node exists in the configuration
3434      - OOB is supported
3435
3436     Any errors are signaled by raising errors.OpPrereqError.
3437
3438     """
3439     self.nodes = []
3440     self.master_node = self.cfg.GetMasterNode()
3441
3442     assert self.op.power_delay >= 0.0
3443
3444     if self.op.node_names:
3445       if (self.op.command in self._SKIP_MASTER and
3446           self.master_node in self.op.node_names):
3447         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3448         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3449
3450         if master_oob_handler:
3451           additional_text = ("run '%s %s %s' if you want to operate on the"
3452                              " master regardless") % (master_oob_handler,
3453                                                       self.op.command,
3454                                                       self.master_node)
3455         else:
3456           additional_text = "it does not support out-of-band operations"
3457
3458         raise errors.OpPrereqError(("Operating on the master node %s is not"
3459                                     " allowed for %s; %s") %
3460                                    (self.master_node, self.op.command,
3461                                     additional_text), errors.ECODE_INVAL)
3462     else:
3463       self.op.node_names = self.cfg.GetNodeList()
3464       if self.op.command in self._SKIP_MASTER:
3465         self.op.node_names.remove(self.master_node)
3466
3467     if self.op.command in self._SKIP_MASTER:
3468       assert self.master_node not in self.op.node_names
3469
3470     for node_name in self.op.node_names:
3471       node = self.cfg.GetNodeInfo(node_name)
3472
3473       if node is None:
3474         raise errors.OpPrereqError("Node %s not found" % node_name,
3475                                    errors.ECODE_NOENT)
3476       else:
3477         self.nodes.append(node)
3478
3479       if (not self.op.ignore_status and
3480           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3481         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3482                                     " not marked offline") % node_name,
3483                                    errors.ECODE_STATE)
3484
3485   def ExpandNames(self):
3486     """Gather locks we need.
3487
3488     """
3489     if self.op.node_names:
3490       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3491       lock_names = self.op.node_names
3492     else:
3493       lock_names = locking.ALL_SET
3494
3495     self.needed_locks = {
3496       locking.LEVEL_NODE: lock_names,
3497       }
3498
3499   def Exec(self, feedback_fn):
3500     """Execute OOB and return result if we expect any.
3501
3502     """
3503     master_node = self.master_node
3504     ret = []
3505
3506     for idx, node in enumerate(self.nodes):
3507       node_entry = [(constants.RS_NORMAL, node.name)]
3508       ret.append(node_entry)
3509
3510       oob_program = _SupportsOob(self.cfg, node)
3511
3512       if not oob_program:
3513         node_entry.append((constants.RS_UNAVAIL, None))
3514         continue
3515
3516       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3517                    self.op.command, oob_program, node.name)
3518       result = self.rpc.call_run_oob(master_node, oob_program,
3519                                      self.op.command, node.name,
3520                                      self.op.timeout)
3521
3522       if result.fail_msg:
3523         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3524                         node.name, result.fail_msg)
3525         node_entry.append((constants.RS_NODATA, None))
3526       else:
3527         try:
3528           self._CheckPayload(result)
3529         except errors.OpExecError, err:
3530           self.LogWarning("Payload returned by node '%s' is not valid: %s",
3531                           node.name, err)
3532           node_entry.append((constants.RS_NODATA, None))
3533         else:
3534           if self.op.command == constants.OOB_HEALTH:
3535             # For health we should log important events
3536             for item, status in result.payload:
3537               if status in [constants.OOB_STATUS_WARNING,
3538                             constants.OOB_STATUS_CRITICAL]:
3539                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
3540                                 item, node.name, status)
3541
3542           if self.op.command == constants.OOB_POWER_ON:
3543             node.powered = True
3544           elif self.op.command == constants.OOB_POWER_OFF:
3545             node.powered = False
3546           elif self.op.command == constants.OOB_POWER_STATUS:
3547             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3548             if powered != node.powered:
3549               logging.warning(("Recorded power state (%s) of node '%s' does not"
3550                                " match actual power state (%s)"), node.powered,
3551                               node.name, powered)
3552
3553           # For configuration changing commands we should update the node
3554           if self.op.command in (constants.OOB_POWER_ON,
3555                                  constants.OOB_POWER_OFF):
3556             self.cfg.Update(node, feedback_fn)
3557
3558           node_entry.append((constants.RS_NORMAL, result.payload))
3559
3560           if (self.op.command == constants.OOB_POWER_ON and
3561               idx < len(self.nodes) - 1):
3562             time.sleep(self.op.power_delay)
3563
3564     return ret
3565
3566   def _CheckPayload(self, result):
3567     """Checks if the payload is valid.
3568
3569     @param result: RPC result
3570     @raises errors.OpExecError: If payload is not valid
3571
3572     """
3573     errs = []
3574     if self.op.command == constants.OOB_HEALTH:
3575       if not isinstance(result.payload, list):
3576         errs.append("command 'health' is expected to return a list but got %s" %
3577                     type(result.payload))
3578       else:
3579         for item, status in result.payload:
3580           if status not in constants.OOB_STATUSES:
3581             errs.append("health item '%s' has invalid status '%s'" %
3582                         (item, status))
3583
3584     if self.op.command == constants.OOB_POWER_STATUS:
3585       if not isinstance(result.payload, dict):
3586         errs.append("power-status is expected to return a dict but got %s" %
3587                     type(result.payload))
3588
3589     if self.op.command in [
3590         constants.OOB_POWER_ON,
3591         constants.OOB_POWER_OFF,
3592         constants.OOB_POWER_CYCLE,
3593         ]:
3594       if result.payload is not None:
3595         errs.append("%s is expected to not return payload but got '%s'" %
3596                     (self.op.command, result.payload))
3597
3598     if errs:
3599       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3600                                utils.CommaJoin(errs))
3601
3602 class _OsQuery(_QueryBase):
3603   FIELDS = query.OS_FIELDS
3604
3605   def ExpandNames(self, lu):
3606     # Lock all nodes in shared mode
3607     # Temporary removal of locks, should be reverted later
3608     # TODO: reintroduce locks when they are lighter-weight
3609     lu.needed_locks = {}
3610     #self.share_locks[locking.LEVEL_NODE] = 1
3611     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3612
3613     # The following variables interact with _QueryBase._GetNames
3614     if self.names:
3615       self.wanted = self.names
3616     else:
3617       self.wanted = locking.ALL_SET
3618
3619     self.do_locking = self.use_locking
3620
3621   def DeclareLocks(self, lu, level):
3622     pass
3623
3624   @staticmethod
3625   def _DiagnoseByOS(rlist):
3626     """Remaps a per-node return list into an a per-os per-node dictionary
3627
3628     @param rlist: a map with node names as keys and OS objects as values
3629
3630     @rtype: dict
3631     @return: a dictionary with osnames as keys and as value another
3632         map, with nodes as keys and tuples of (path, status, diagnose,
3633         variants, parameters, api_versions) as values, eg::
3634
3635           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3636                                      (/srv/..., False, "invalid api")],
3637                            "node2": [(/srv/..., True, "", [], [])]}
3638           }
3639
3640     """
3641     all_os = {}
3642     # we build here the list of nodes that didn't fail the RPC (at RPC
3643     # level), so that nodes with a non-responding node daemon don't
3644     # make all OSes invalid
3645     good_nodes = [node_name for node_name in rlist
3646                   if not rlist[node_name].fail_msg]
3647     for node_name, nr in rlist.items():
3648       if nr.fail_msg or not nr.payload:
3649         continue
3650       for (name, path, status, diagnose, variants,
3651            params, api_versions) in nr.payload:
3652         if name not in all_os:
3653           # build a list of nodes for this os containing empty lists
3654           # for each node in node_list
3655           all_os[name] = {}
3656           for nname in good_nodes:
3657             all_os[name][nname] = []
3658         # convert params from [name, help] to (name, help)
3659         params = [tuple(v) for v in params]
3660         all_os[name][node_name].append((path, status, diagnose,
3661                                         variants, params, api_versions))
3662     return all_os
3663
3664   def _GetQueryData(self, lu):
3665     """Computes the list of nodes and their attributes.
3666
3667     """
3668     # Locking is not used
3669     assert not (compat.any(lu.glm.is_owned(level)
3670                            for level in locking.LEVELS
3671                            if level != locking.LEVEL_CLUSTER) or
3672                 self.do_locking or self.use_locking)
3673
3674     valid_nodes = [node.name
3675                    for node in lu.cfg.GetAllNodesInfo().values()
3676                    if not node.offline and node.vm_capable]
3677     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3678     cluster = lu.cfg.GetClusterInfo()
3679
3680     data = {}
3681
3682     for (os_name, os_data) in pol.items():
3683       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3684                           hidden=(os_name in cluster.hidden_os),
3685                           blacklisted=(os_name in cluster.blacklisted_os))
3686
3687       variants = set()
3688       parameters = set()
3689       api_versions = set()
3690
3691       for idx, osl in enumerate(os_data.values()):
3692         info.valid = bool(info.valid and osl and osl[0][1])
3693         if not info.valid:
3694           break
3695
3696         (node_variants, node_params, node_api) = osl[0][3:6]
3697         if idx == 0:
3698           # First entry
3699           variants.update(node_variants)
3700           parameters.update(node_params)
3701           api_versions.update(node_api)
3702         else:
3703           # Filter out inconsistent values
3704           variants.intersection_update(node_variants)
3705           parameters.intersection_update(node_params)
3706           api_versions.intersection_update(node_api)
3707
3708       info.variants = list(variants)
3709       info.parameters = list(parameters)
3710       info.api_versions = list(api_versions)
3711
3712       data[os_name] = info
3713
3714     # Prepare data in requested order
3715     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3716             if name in data]
3717
3718
3719 class LUOsDiagnose(NoHooksLU):
3720   """Logical unit for OS diagnose/query.
3721
3722   """
3723   REQ_BGL = False
3724
3725   @staticmethod
3726   def _BuildFilter(fields, names):
3727     """Builds a filter for querying OSes.
3728
3729     """
3730     name_filter = qlang.MakeSimpleFilter("name", names)
3731
3732     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3733     # respective field is not requested
3734     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3735                      for fname in ["hidden", "blacklisted"]
3736                      if fname not in fields]
3737     if "valid" not in fields:
3738       status_filter.append([qlang.OP_TRUE, "valid"])
3739
3740     if status_filter:
3741       status_filter.insert(0, qlang.OP_AND)
3742     else:
3743       status_filter = None
3744
3745     if name_filter and status_filter:
3746       return [qlang.OP_AND, name_filter, status_filter]
3747     elif name_filter:
3748       return name_filter
3749     else:
3750       return status_filter
3751
3752   def CheckArguments(self):
3753     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3754                        self.op.output_fields, False)
3755
3756   def ExpandNames(self):
3757     self.oq.ExpandNames(self)
3758
3759   def Exec(self, feedback_fn):
3760     return self.oq.OldStyleQuery(self)
3761
3762
3763 class LUNodeRemove(LogicalUnit):
3764   """Logical unit for removing a node.
3765
3766   """
3767   HPATH = "node-remove"
3768   HTYPE = constants.HTYPE_NODE
3769
3770   def BuildHooksEnv(self):
3771     """Build hooks env.
3772
3773     This doesn't run on the target node in the pre phase as a failed
3774     node would then be impossible to remove.
3775
3776     """
3777     return {
3778       "OP_TARGET": self.op.node_name,
3779       "NODE_NAME": self.op.node_name,
3780       }
3781
3782   def BuildHooksNodes(self):
3783     """Build hooks nodes.
3784
3785     """
3786     all_nodes = self.cfg.GetNodeList()
3787     try:
3788       all_nodes.remove(self.op.node_name)
3789     except ValueError:
3790       logging.warning("Node '%s', which is about to be removed, was not found"
3791                       " in the list of all nodes", self.op.node_name)
3792     return (all_nodes, all_nodes)
3793
3794   def CheckPrereq(self):
3795     """Check prerequisites.
3796
3797     This checks:
3798      - the node exists in the configuration
3799      - it does not have primary or secondary instances
3800      - it's not the master
3801
3802     Any errors are signaled by raising errors.OpPrereqError.
3803
3804     """
3805     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3806     node = self.cfg.GetNodeInfo(self.op.node_name)
3807     assert node is not None
3808
3809     instance_list = self.cfg.GetInstanceList()
3810
3811     masternode = self.cfg.GetMasterNode()
3812     if node.name == masternode:
3813       raise errors.OpPrereqError("Node is the master node, failover to another"
3814                                  " node is required", errors.ECODE_INVAL)
3815
3816     for instance_name in instance_list:
3817       instance = self.cfg.GetInstanceInfo(instance_name)
3818       if node.name in instance.all_nodes:
3819         raise errors.OpPrereqError("Instance %s is still running on the node,"
3820                                    " please remove first" % instance_name,
3821                                    errors.ECODE_INVAL)
3822     self.op.node_name = node.name
3823     self.node = node
3824
3825   def Exec(self, feedback_fn):
3826     """Removes the node from the cluster.
3827
3828     """
3829     node = self.node
3830     logging.info("Stopping the node daemon and removing configs from node %s",
3831                  node.name)
3832
3833     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3834
3835     # Promote nodes to master candidate as needed
3836     _AdjustCandidatePool(self, exceptions=[node.name])
3837     self.context.RemoveNode(node.name)
3838
3839     # Run post hooks on the node before it's removed
3840     _RunPostHook(self, node.name)
3841
3842     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3843     msg = result.fail_msg
3844     if msg:
3845       self.LogWarning("Errors encountered on the remote node while leaving"
3846                       " the cluster: %s", msg)
3847
3848     # Remove node from our /etc/hosts
3849     if self.cfg.GetClusterInfo().modify_etc_hosts:
3850       master_node = self.cfg.GetMasterNode()
3851       result = self.rpc.call_etc_hosts_modify(master_node,
3852                                               constants.ETC_HOSTS_REMOVE,
3853                                               node.name, None)
3854       result.Raise("Can't update hosts file with new host data")
3855       _RedistributeAncillaryFiles(self)
3856
3857
3858 class _NodeQuery(_QueryBase):
3859   FIELDS = query.NODE_FIELDS
3860
3861   def ExpandNames(self, lu):
3862     lu.needed_locks = {}
3863     lu.share_locks[locking.LEVEL_NODE] = 1
3864
3865     if self.names:
3866       self.wanted = _GetWantedNodes(lu, self.names)
3867     else:
3868       self.wanted = locking.ALL_SET
3869
3870     self.do_locking = (self.use_locking and
3871                        query.NQ_LIVE in self.requested_data)
3872
3873     if self.do_locking:
3874       # if we don't request only static fields, we need to lock the nodes
3875       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3876
3877   def DeclareLocks(self, lu, level):
3878     pass
3879
3880   def _GetQueryData(self, lu):
3881     """Computes the list of nodes and their attributes.
3882
3883     """
3884     all_info = lu.cfg.GetAllNodesInfo()
3885
3886     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3887
3888     # Gather data as requested
3889     if query.NQ_LIVE in self.requested_data:
3890       # filter out non-vm_capable nodes
3891       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3892
3893       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3894                                         lu.cfg.GetHypervisorType())
3895       live_data = dict((name, nresult.payload)
3896                        for (name, nresult) in node_data.items()
3897                        if not nresult.fail_msg and nresult.payload)
3898     else:
3899       live_data = None
3900
3901     if query.NQ_INST in self.requested_data:
3902       node_to_primary = dict([(name, set()) for name in nodenames])
3903       node_to_secondary = dict([(name, set()) for name in nodenames])
3904
3905       inst_data = lu.cfg.GetAllInstancesInfo()
3906
3907       for inst in inst_data.values():
3908         if inst.primary_node in node_to_primary:
3909           node_to_primary[inst.primary_node].add(inst.name)
3910         for secnode in inst.secondary_nodes:
3911           if secnode in node_to_secondary:
3912             node_to_secondary[secnode].add(inst.name)
3913     else:
3914       node_to_primary = None
3915       node_to_secondary = None
3916
3917     if query.NQ_OOB in self.requested_data:
3918       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3919                          for name, node in all_info.iteritems())
3920     else:
3921       oob_support = None
3922
3923     if query.NQ_GROUP in self.requested_data:
3924       groups = lu.cfg.GetAllNodeGroupsInfo()
3925     else:
3926       groups = {}
3927
3928     return query.NodeQueryData([all_info[name] for name in nodenames],
3929                                live_data, lu.cfg.GetMasterNode(),
3930                                node_to_primary, node_to_secondary, groups,
3931                                oob_support, lu.cfg.GetClusterInfo())
3932
3933
3934 class LUNodeQuery(NoHooksLU):
3935   """Logical unit for querying nodes.
3936
3937   """
3938   # pylint: disable-msg=W0142
3939   REQ_BGL = False
3940
3941   def CheckArguments(self):
3942     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3943                          self.op.output_fields, self.op.use_locking)
3944
3945   def ExpandNames(self):
3946     self.nq.ExpandNames(self)
3947
3948   def Exec(self, feedback_fn):
3949     return self.nq.OldStyleQuery(self)
3950
3951
3952 class LUNodeQueryvols(NoHooksLU):
3953   """Logical unit for getting volumes on node(s).
3954
3955   """
3956   REQ_BGL = False
3957   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3958   _FIELDS_STATIC = utils.FieldSet("node")
3959
3960   def CheckArguments(self):
3961     _CheckOutputFields(static=self._FIELDS_STATIC,
3962                        dynamic=self._FIELDS_DYNAMIC,
3963                        selected=self.op.output_fields)
3964
3965   def ExpandNames(self):
3966     self.needed_locks = {}
3967     self.share_locks[locking.LEVEL_NODE] = 1
3968     if not self.op.nodes:
3969       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3970     else:
3971       self.needed_locks[locking.LEVEL_NODE] = \
3972         _GetWantedNodes(self, self.op.nodes)
3973
3974   def Exec(self, feedback_fn):
3975     """Computes the list of nodes and their attributes.
3976
3977     """
3978     nodenames = self.glm.list_owned(locking.LEVEL_NODE)
3979     volumes = self.rpc.call_node_volumes(nodenames)
3980
3981     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3982              in self.cfg.GetInstanceList()]
3983
3984     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3985
3986     output = []
3987     for node in nodenames:
3988       nresult = volumes[node]
3989       if nresult.offline:
3990         continue
3991       msg = nresult.fail_msg
3992       if msg:
3993         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3994         continue
3995
3996       node_vols = nresult.payload[:]
3997       node_vols.sort(key=lambda vol: vol['dev'])
3998
3999       for vol in node_vols:
4000         node_output = []
4001         for field in self.op.output_fields:
4002           if field == "node":
4003             val = node
4004           elif field == "phys":
4005             val = vol['dev']
4006           elif field == "vg":
4007             val = vol['vg']
4008           elif field == "name":
4009             val = vol['name']
4010           elif field == "size":
4011             val = int(float(vol['size']))
4012           elif field == "instance":
4013             for inst in ilist:
4014               if node not in lv_by_node[inst]:
4015                 continue
4016               if vol['name'] in lv_by_node[inst][node]:
4017                 val = inst.name
4018                 break
4019             else:
4020               val = '-'
4021           else:
4022             raise errors.ParameterError(field)
4023           node_output.append(str(val))
4024
4025         output.append(node_output)
4026
4027     return output
4028
4029
4030 class LUNodeQueryStorage(NoHooksLU):
4031   """Logical unit for getting information on storage units on node(s).
4032
4033   """
4034   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4035   REQ_BGL = False
4036
4037   def CheckArguments(self):
4038     _CheckOutputFields(static=self._FIELDS_STATIC,
4039                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4040                        selected=self.op.output_fields)
4041
4042   def ExpandNames(self):
4043     self.needed_locks = {}
4044     self.share_locks[locking.LEVEL_NODE] = 1
4045
4046     if self.op.nodes:
4047       self.needed_locks[locking.LEVEL_NODE] = \
4048         _GetWantedNodes(self, self.op.nodes)
4049     else:
4050       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4051
4052   def Exec(self, feedback_fn):
4053     """Computes the list of nodes and their attributes.
4054
4055     """
4056     self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4057
4058     # Always get name to sort by
4059     if constants.SF_NAME in self.op.output_fields:
4060       fields = self.op.output_fields[:]
4061     else:
4062       fields = [constants.SF_NAME] + self.op.output_fields
4063
4064     # Never ask for node or type as it's only known to the LU
4065     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4066       while extra in fields:
4067         fields.remove(extra)
4068
4069     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4070     name_idx = field_idx[constants.SF_NAME]
4071
4072     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4073     data = self.rpc.call_storage_list(self.nodes,
4074                                       self.op.storage_type, st_args,
4075                                       self.op.name, fields)
4076
4077     result = []
4078
4079     for node in utils.NiceSort(self.nodes):
4080       nresult = data[node]
4081       if nresult.offline:
4082         continue
4083
4084       msg = nresult.fail_msg
4085       if msg:
4086         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4087         continue
4088
4089       rows = dict([(row[name_idx], row) for row in nresult.payload])
4090
4091       for name in utils.NiceSort(rows.keys()):
4092         row = rows[name]
4093
4094         out = []
4095
4096         for field in self.op.output_fields:
4097           if field == constants.SF_NODE:
4098             val = node
4099           elif field == constants.SF_TYPE:
4100             val = self.op.storage_type
4101           elif field in field_idx:
4102             val = row[field_idx[field]]
4103           else:
4104             raise errors.ParameterError(field)
4105
4106           out.append(val)
4107
4108         result.append(out)
4109
4110     return result
4111
4112
4113 class _InstanceQuery(_QueryBase):
4114   FIELDS = query.INSTANCE_FIELDS
4115
4116   def ExpandNames(self, lu):
4117     lu.needed_locks = {}
4118     lu.share_locks[locking.LEVEL_INSTANCE] = 1
4119     lu.share_locks[locking.LEVEL_NODE] = 1
4120
4121     if self.names:
4122       self.wanted = _GetWantedInstances(lu, self.names)
4123     else:
4124       self.wanted = locking.ALL_SET
4125
4126     self.do_locking = (self.use_locking and
4127                        query.IQ_LIVE in self.requested_data)
4128     if self.do_locking:
4129       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4130       lu.needed_locks[locking.LEVEL_NODE] = []
4131       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4132
4133   def DeclareLocks(self, lu, level):
4134     if level == locking.LEVEL_NODE and self.do_locking:
4135       lu._LockInstancesNodes() # pylint: disable-msg=W0212
4136
4137   def _GetQueryData(self, lu):
4138     """Computes the list of instances and their attributes.
4139
4140     """
4141     cluster = lu.cfg.GetClusterInfo()
4142     all_info = lu.cfg.GetAllInstancesInfo()
4143
4144     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4145
4146     instance_list = [all_info[name] for name in instance_names]
4147     nodes = frozenset(itertools.chain(*(inst.all_nodes
4148                                         for inst in instance_list)))
4149     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4150     bad_nodes = []
4151     offline_nodes = []
4152     wrongnode_inst = set()
4153
4154     # Gather data as requested
4155     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4156       live_data = {}
4157       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4158       for name in nodes:
4159         result = node_data[name]
4160         if result.offline:
4161           # offline nodes will be in both lists
4162           assert result.fail_msg
4163           offline_nodes.append(name)
4164         if result.fail_msg:
4165           bad_nodes.append(name)
4166         elif result.payload:
4167           for inst in result.payload:
4168             if inst in all_info:
4169               if all_info[inst].primary_node == name:
4170                 live_data.update(result.payload)
4171               else:
4172                 wrongnode_inst.add(inst)
4173             else:
4174               # orphan instance; we don't list it here as we don't
4175               # handle this case yet in the output of instance listing
4176               logging.warning("Orphan instance '%s' found on node %s",
4177                               inst, name)
4178         # else no instance is alive
4179     else:
4180       live_data = {}
4181
4182     if query.IQ_DISKUSAGE in self.requested_data:
4183       disk_usage = dict((inst.name,
4184                          _ComputeDiskSize(inst.disk_template,
4185                                           [{constants.IDISK_SIZE: disk.size}
4186                                            for disk in inst.disks]))
4187                         for inst in instance_list)
4188     else:
4189       disk_usage = None
4190
4191     if query.IQ_CONSOLE in self.requested_data:
4192       consinfo = {}
4193       for inst in instance_list:
4194         if inst.name in live_data:
4195           # Instance is running
4196           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4197         else:
4198           consinfo[inst.name] = None
4199       assert set(consinfo.keys()) == set(instance_names)
4200     else:
4201       consinfo = None
4202
4203     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4204                                    disk_usage, offline_nodes, bad_nodes,
4205                                    live_data, wrongnode_inst, consinfo)
4206
4207
4208 class LUQuery(NoHooksLU):
4209   """Query for resources/items of a certain kind.
4210
4211   """
4212   # pylint: disable-msg=W0142
4213   REQ_BGL = False
4214
4215   def CheckArguments(self):
4216     qcls = _GetQueryImplementation(self.op.what)
4217
4218     self.impl = qcls(self.op.filter, self.op.fields, False)
4219
4220   def ExpandNames(self):
4221     self.impl.ExpandNames(self)
4222
4223   def DeclareLocks(self, level):
4224     self.impl.DeclareLocks(self, level)
4225
4226   def Exec(self, feedback_fn):
4227     return self.impl.NewStyleQuery(self)
4228
4229
4230 class LUQueryFields(NoHooksLU):
4231   """Query for resources/items of a certain kind.
4232
4233   """
4234   # pylint: disable-msg=W0142
4235   REQ_BGL = False
4236
4237   def CheckArguments(self):
4238     self.qcls = _GetQueryImplementation(self.op.what)
4239
4240   def ExpandNames(self):
4241     self.needed_locks = {}
4242
4243   def Exec(self, feedback_fn):
4244     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4245
4246
4247 class LUNodeModifyStorage(NoHooksLU):
4248   """Logical unit for modifying a storage volume on a node.
4249
4250   """
4251   REQ_BGL = False
4252
4253   def CheckArguments(self):
4254     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4255
4256     storage_type = self.op.storage_type
4257
4258     try:
4259       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4260     except KeyError:
4261       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4262                                  " modified" % storage_type,
4263                                  errors.ECODE_INVAL)
4264
4265     diff = set(self.op.changes.keys()) - modifiable
4266     if diff:
4267       raise errors.OpPrereqError("The following fields can not be modified for"
4268                                  " storage units of type '%s': %r" %
4269                                  (storage_type, list(diff)),
4270                                  errors.ECODE_INVAL)
4271
4272   def ExpandNames(self):
4273     self.needed_locks = {
4274       locking.LEVEL_NODE: self.op.node_name,
4275       }
4276
4277   def Exec(self, feedback_fn):
4278     """Computes the list of nodes and their attributes.
4279
4280     """
4281     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4282     result = self.rpc.call_storage_modify(self.op.node_name,
4283                                           self.op.storage_type, st_args,
4284                                           self.op.name, self.op.changes)
4285     result.Raise("Failed to modify storage unit '%s' on %s" %
4286                  (self.op.name, self.op.node_name))
4287
4288
4289 class LUNodeAdd(LogicalUnit):
4290   """Logical unit for adding node to the cluster.
4291
4292   """
4293   HPATH = "node-add"
4294   HTYPE = constants.HTYPE_NODE
4295   _NFLAGS = ["master_capable", "vm_capable"]
4296
4297   def CheckArguments(self):
4298     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4299     # validate/normalize the node name
4300     self.hostname = netutils.GetHostname(name=self.op.node_name,
4301                                          family=self.primary_ip_family)
4302     self.op.node_name = self.hostname.name
4303
4304     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4305       raise errors.OpPrereqError("Cannot readd the master node",
4306                                  errors.ECODE_STATE)
4307
4308     if self.op.readd and self.op.group:
4309       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4310                                  " being readded", errors.ECODE_INVAL)
4311
4312   def BuildHooksEnv(self):
4313     """Build hooks env.
4314
4315     This will run on all nodes before, and on all nodes + the new node after.
4316
4317     """
4318     return {
4319       "OP_TARGET": self.op.node_name,
4320       "NODE_NAME": self.op.node_name,
4321       "NODE_PIP": self.op.primary_ip,
4322       "NODE_SIP": self.op.secondary_ip,
4323       "MASTER_CAPABLE": str(self.op.master_capable),
4324       "VM_CAPABLE": str(self.op.vm_capable),
4325       }
4326
4327   def BuildHooksNodes(self):
4328     """Build hooks nodes.
4329
4330     """
4331     # Exclude added node
4332     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4333     post_nodes = pre_nodes + [self.op.node_name, ]
4334
4335     return (pre_nodes, post_nodes)
4336
4337   def CheckPrereq(self):
4338     """Check prerequisites.
4339
4340     This checks:
4341      - the new node is not already in the config
4342      - it is resolvable
4343      - its parameters (single/dual homed) matches the cluster
4344
4345     Any errors are signaled by raising errors.OpPrereqError.
4346
4347     """
4348     cfg = self.cfg
4349     hostname = self.hostname
4350     node = hostname.name
4351     primary_ip = self.op.primary_ip = hostname.ip
4352     if self.op.secondary_ip is None:
4353       if self.primary_ip_family == netutils.IP6Address.family:
4354         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4355                                    " IPv4 address must be given as secondary",
4356                                    errors.ECODE_INVAL)
4357       self.op.secondary_ip = primary_ip
4358
4359     secondary_ip = self.op.secondary_ip
4360     if not netutils.IP4Address.IsValid(secondary_ip):
4361       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4362                                  " address" % secondary_ip, errors.ECODE_INVAL)
4363
4364     node_list = cfg.GetNodeList()
4365     if not self.op.readd and node in node_list:
4366       raise errors.OpPrereqError("Node %s is already in the configuration" %
4367                                  node, errors.ECODE_EXISTS)
4368     elif self.op.readd and node not in node_list:
4369       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4370                                  errors.ECODE_NOENT)
4371
4372     self.changed_primary_ip = False
4373
4374     for existing_node_name in node_list:
4375       existing_node = cfg.GetNodeInfo(existing_node_name)
4376
4377       if self.op.readd and node == existing_node_name:
4378         if existing_node.secondary_ip != secondary_ip:
4379           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4380                                      " address configuration as before",
4381                                      errors.ECODE_INVAL)
4382         if existing_node.primary_ip != primary_ip:
4383           self.changed_primary_ip = True
4384
4385         continue
4386
4387       if (existing_node.primary_ip == primary_ip or
4388           existing_node.secondary_ip == primary_ip or
4389           existing_node.primary_ip == secondary_ip or
4390           existing_node.secondary_ip == secondary_ip):
4391         raise errors.OpPrereqError("New node ip address(es) conflict with"
4392                                    " existing node %s" % existing_node.name,
4393                                    errors.ECODE_NOTUNIQUE)
4394
4395     # After this 'if' block, None is no longer a valid value for the
4396     # _capable op attributes
4397     if self.op.readd:
4398       old_node = self.cfg.GetNodeInfo(node)
4399       assert old_node is not None, "Can't retrieve locked node %s" % node
4400       for attr in self._NFLAGS:
4401         if getattr(self.op, attr) is None:
4402           setattr(self.op, attr, getattr(old_node, attr))
4403     else:
4404       for attr in self._NFLAGS:
4405         if getattr(self.op, attr) is None:
4406           setattr(self.op, attr, True)
4407
4408     if self.op.readd and not self.op.vm_capable:
4409       pri, sec = cfg.GetNodeInstances(node)
4410       if pri or sec:
4411         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4412                                    " flag set to false, but it already holds"
4413                                    " instances" % node,
4414                                    errors.ECODE_STATE)
4415
4416     # check that the type of the node (single versus dual homed) is the
4417     # same as for the master
4418     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4419     master_singlehomed = myself.secondary_ip == myself.primary_ip
4420     newbie_singlehomed = secondary_ip == primary_ip
4421     if master_singlehomed != newbie_singlehomed:
4422       if master_singlehomed:
4423         raise errors.OpPrereqError("The master has no secondary ip but the"
4424                                    " new node has one",
4425                                    errors.ECODE_INVAL)
4426       else:
4427         raise errors.OpPrereqError("The master has a secondary ip but the"
4428                                    " new node doesn't have one",
4429                                    errors.ECODE_INVAL)
4430
4431     # checks reachability
4432     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4433       raise errors.OpPrereqError("Node not reachable by ping",
4434                                  errors.ECODE_ENVIRON)
4435
4436     if not newbie_singlehomed:
4437       # check reachability from my secondary ip to newbie's secondary ip
4438       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4439                            source=myself.secondary_ip):
4440         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4441                                    " based ping to node daemon port",
4442                                    errors.ECODE_ENVIRON)
4443
4444     if self.op.readd:
4445       exceptions = [node]
4446     else:
4447       exceptions = []
4448
4449     if self.op.master_capable:
4450       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4451     else:
4452       self.master_candidate = False
4453
4454     if self.op.readd:
4455       self.new_node = old_node
4456     else:
4457       node_group = cfg.LookupNodeGroup(self.op.group)
4458       self.new_node = objects.Node(name=node,
4459                                    primary_ip=primary_ip,
4460                                    secondary_ip=secondary_ip,
4461                                    master_candidate=self.master_candidate,
4462                                    offline=False, drained=False,
4463                                    group=node_group)
4464
4465     if self.op.ndparams:
4466       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4467
4468   def Exec(self, feedback_fn):
4469     """Adds the new node to the cluster.
4470
4471     """
4472     new_node = self.new_node
4473     node = new_node.name
4474
4475     # We adding a new node so we assume it's powered
4476     new_node.powered = True
4477
4478     # for re-adds, reset the offline/drained/master-candidate flags;
4479     # we need to reset here, otherwise offline would prevent RPC calls
4480     # later in the procedure; this also means that if the re-add
4481     # fails, we are left with a non-offlined, broken node
4482     if self.op.readd:
4483       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4484       self.LogInfo("Readding a node, the offline/drained flags were reset")
4485       # if we demote the node, we do cleanup later in the procedure
4486       new_node.master_candidate = self.master_candidate
4487       if self.changed_primary_ip:
4488         new_node.primary_ip = self.op.primary_ip
4489
4490     # copy the master/vm_capable flags
4491     for attr in self._NFLAGS:
4492       setattr(new_node, attr, getattr(self.op, attr))
4493
4494     # notify the user about any possible mc promotion
4495     if new_node.master_candidate:
4496       self.LogInfo("Node will be a master candidate")
4497
4498     if self.op.ndparams:
4499       new_node.ndparams = self.op.ndparams
4500     else:
4501       new_node.ndparams = {}
4502
4503     # check connectivity
4504     result = self.rpc.call_version([node])[node]
4505     result.Raise("Can't get version information from node %s" % node)
4506     if constants.PROTOCOL_VERSION == result.payload:
4507       logging.info("Communication to node %s fine, sw version %s match",
4508                    node, result.payload)
4509     else:
4510       raise errors.OpExecError("Version mismatch master version %s,"
4511                                " node version %s" %
4512                                (constants.PROTOCOL_VERSION, result.payload))
4513
4514     # Add node to our /etc/hosts, and add key to known_hosts
4515     if self.cfg.GetClusterInfo().modify_etc_hosts:
4516       master_node = self.cfg.GetMasterNode()
4517       result = self.rpc.call_etc_hosts_modify(master_node,
4518                                               constants.ETC_HOSTS_ADD,
4519                                               self.hostname.name,
4520                                               self.hostname.ip)
4521       result.Raise("Can't update hosts file with new host data")
4522
4523     if new_node.secondary_ip != new_node.primary_ip:
4524       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4525                                False)
4526
4527     node_verify_list = [self.cfg.GetMasterNode()]
4528     node_verify_param = {
4529       constants.NV_NODELIST: [node],
4530       # TODO: do a node-net-test as well?
4531     }
4532
4533     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4534                                        self.cfg.GetClusterName())
4535     for verifier in node_verify_list:
4536       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4537       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4538       if nl_payload:
4539         for failed in nl_payload:
4540           feedback_fn("ssh/hostname verification failed"
4541                       " (checking from %s): %s" %
4542                       (verifier, nl_payload[failed]))
4543         raise errors.OpExecError("ssh/hostname verification failed")
4544
4545     if self.op.readd:
4546       _RedistributeAncillaryFiles(self)
4547       self.context.ReaddNode(new_node)
4548       # make sure we redistribute the config
4549       self.cfg.Update(new_node, feedback_fn)
4550       # and make sure the new node will not have old files around
4551       if not new_node.master_candidate:
4552         result = self.rpc.call_node_demote_from_mc(new_node.name)
4553         msg = result.fail_msg
4554         if msg:
4555           self.LogWarning("Node failed to demote itself from master"
4556                           " candidate status: %s" % msg)
4557     else:
4558       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4559                                   additional_vm=self.op.vm_capable)
4560       self.context.AddNode(new_node, self.proc.GetECId())
4561
4562
4563 class LUNodeSetParams(LogicalUnit):
4564   """Modifies the parameters of a node.
4565
4566   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4567       to the node role (as _ROLE_*)
4568   @cvar _R2F: a dictionary from node role to tuples of flags
4569   @cvar _FLAGS: a list of attribute names corresponding to the flags
4570
4571   """
4572   HPATH = "node-modify"
4573   HTYPE = constants.HTYPE_NODE
4574   REQ_BGL = False
4575   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4576   _F2R = {
4577     (True, False, False): _ROLE_CANDIDATE,
4578     (False, True, False): _ROLE_DRAINED,
4579     (False, False, True): _ROLE_OFFLINE,
4580     (False, False, False): _ROLE_REGULAR,
4581     }
4582   _R2F = dict((v, k) for k, v in _F2R.items())
4583   _FLAGS = ["master_candidate", "drained", "offline"]
4584
4585   def CheckArguments(self):
4586     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4587     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4588                 self.op.master_capable, self.op.vm_capable,
4589                 self.op.secondary_ip, self.op.ndparams]
4590     if all_mods.count(None) == len(all_mods):
4591       raise errors.OpPrereqError("Please pass at least one modification",
4592                                  errors.ECODE_INVAL)
4593     if all_mods.count(True) > 1:
4594       raise errors.OpPrereqError("Can't set the node into more than one"
4595                                  " state at the same time",
4596                                  errors.ECODE_INVAL)
4597
4598     # Boolean value that tells us whether we might be demoting from MC
4599     self.might_demote = (self.op.master_candidate == False or
4600                          self.op.offline == True or
4601                          self.op.drained == True or
4602                          self.op.master_capable == False)
4603
4604     if self.op.secondary_ip:
4605       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4606         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4607                                    " address" % self.op.secondary_ip,
4608                                    errors.ECODE_INVAL)
4609
4610     self.lock_all = self.op.auto_promote and self.might_demote
4611     self.lock_instances = self.op.secondary_ip is not None
4612
4613   def ExpandNames(self):
4614     if self.lock_all:
4615       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4616     else:
4617       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4618
4619     if self.lock_instances:
4620       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4621
4622   def DeclareLocks(self, level):
4623     # If we have locked all instances, before waiting to lock nodes, release
4624     # all the ones living on nodes unrelated to the current operation.
4625     if level == locking.LEVEL_NODE and self.lock_instances:
4626       self.affected_instances = []
4627       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4628         instances_keep = []
4629
4630         # Build list of instances to release
4631         for instance_name in self.glm.list_owned(locking.LEVEL_INSTANCE):
4632           instance = self.context.cfg.GetInstanceInfo(instance_name)
4633           if (instance.disk_template in constants.DTS_INT_MIRROR and
4634               self.op.node_name in instance.all_nodes):
4635             instances_keep.append(instance_name)
4636             self.affected_instances.append(instance)
4637
4638         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4639
4640         assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
4641                 set(instances_keep))
4642
4643   def BuildHooksEnv(self):
4644     """Build hooks env.
4645
4646     This runs on the master node.
4647
4648     """
4649     return {
4650       "OP_TARGET": self.op.node_name,
4651       "MASTER_CANDIDATE": str(self.op.master_candidate),
4652       "OFFLINE": str(self.op.offline),
4653       "DRAINED": str(self.op.drained),
4654       "MASTER_CAPABLE": str(self.op.master_capable),
4655       "VM_CAPABLE": str(self.op.vm_capable),
4656       }
4657
4658   def BuildHooksNodes(self):
4659     """Build hooks nodes.
4660
4661     """
4662     nl = [self.cfg.GetMasterNode(), self.op.node_name]
4663     return (nl, nl)
4664
4665   def CheckPrereq(self):
4666     """Check prerequisites.
4667
4668     This only checks the instance list against the existing names.
4669
4670     """
4671     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4672
4673     if (self.op.master_candidate is not None or
4674         self.op.drained is not None or
4675         self.op.offline is not None):
4676       # we can't change the master's node flags
4677       if self.op.node_name == self.cfg.GetMasterNode():
4678         raise errors.OpPrereqError("The master role can be changed"
4679                                    " only via master-failover",
4680                                    errors.ECODE_INVAL)
4681
4682     if self.op.master_candidate and not node.master_capable:
4683       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4684                                  " it a master candidate" % node.name,
4685                                  errors.ECODE_STATE)
4686
4687     if self.op.vm_capable == False:
4688       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4689       if ipri or isec:
4690         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4691                                    " the vm_capable flag" % node.name,
4692                                    errors.ECODE_STATE)
4693
4694     if node.master_candidate and self.might_demote and not self.lock_all:
4695       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4696       # check if after removing the current node, we're missing master
4697       # candidates
4698       (mc_remaining, mc_should, _) = \
4699           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4700       if mc_remaining < mc_should:
4701         raise errors.OpPrereqError("Not enough master candidates, please"
4702                                    " pass auto promote option to allow"
4703                                    " promotion", errors.ECODE_STATE)
4704
4705     self.old_flags = old_flags = (node.master_candidate,
4706                                   node.drained, node.offline)
4707     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4708     self.old_role = old_role = self._F2R[old_flags]
4709
4710     # Check for ineffective changes
4711     for attr in self._FLAGS:
4712       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4713         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4714         setattr(self.op, attr, None)
4715
4716     # Past this point, any flag change to False means a transition
4717     # away from the respective state, as only real changes are kept
4718
4719     # TODO: We might query the real power state if it supports OOB
4720     if _SupportsOob(self.cfg, node):
4721       if self.op.offline is False and not (node.powered or
4722                                            self.op.powered == True):
4723         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4724                                     " offline status can be reset") %
4725                                    self.op.node_name)
4726     elif self.op.powered is not None:
4727       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4728                                   " as it does not support out-of-band"
4729                                   " handling") % self.op.node_name)
4730
4731     # If we're being deofflined/drained, we'll MC ourself if needed
4732     if (self.op.drained == False or self.op.offline == False or
4733         (self.op.master_capable and not node.master_capable)):
4734       if _DecideSelfPromotion(self):
4735         self.op.master_candidate = True
4736         self.LogInfo("Auto-promoting node to master candidate")
4737
4738     # If we're no longer master capable, we'll demote ourselves from MC
4739     if self.op.master_capable == False and node.master_candidate:
4740       self.LogInfo("Demoting from master candidate")
4741       self.op.master_candidate = False
4742
4743     # Compute new role
4744     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4745     if self.op.master_candidate:
4746       new_role = self._ROLE_CANDIDATE
4747     elif self.op.drained:
4748       new_role = self._ROLE_DRAINED
4749     elif self.op.offline:
4750       new_role = self._ROLE_OFFLINE
4751     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4752       # False is still in new flags, which means we're un-setting (the
4753       # only) True flag
4754       new_role = self._ROLE_REGULAR
4755     else: # no new flags, nothing, keep old role
4756       new_role = old_role
4757
4758     self.new_role = new_role
4759
4760     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4761       # Trying to transition out of offline status
4762       result = self.rpc.call_version([node.name])[node.name]
4763       if result.fail_msg:
4764         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4765                                    " to report its version: %s" %
4766                                    (node.name, result.fail_msg),
4767                                    errors.ECODE_STATE)
4768       else:
4769         self.LogWarning("Transitioning node from offline to online state"
4770                         " without using re-add. Please make sure the node"
4771                         " is healthy!")
4772
4773     if self.op.secondary_ip:
4774       # Ok even without locking, because this can't be changed by any LU
4775       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4776       master_singlehomed = master.secondary_ip == master.primary_ip
4777       if master_singlehomed and self.op.secondary_ip:
4778         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4779                                    " homed cluster", errors.ECODE_INVAL)
4780
4781       if node.offline:
4782         if self.affected_instances:
4783           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4784                                      " node has instances (%s) configured"
4785                                      " to use it" % self.affected_instances)
4786       else:
4787         # On online nodes, check that no instances are running, and that
4788         # the node has the new ip and we can reach it.
4789         for instance in self.affected_instances:
4790           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4791
4792         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4793         if master.name != node.name:
4794           # check reachability from master secondary ip to new secondary ip
4795           if not netutils.TcpPing(self.op.secondary_ip,
4796                                   constants.DEFAULT_NODED_PORT,
4797                                   source=master.secondary_ip):
4798             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4799                                        " based ping to node daemon port",
4800                                        errors.ECODE_ENVIRON)
4801
4802     if self.op.ndparams:
4803       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4804       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4805       self.new_ndparams = new_ndparams
4806
4807   def Exec(self, feedback_fn):
4808     """Modifies a node.
4809
4810     """
4811     node = self.node
4812     old_role = self.old_role
4813     new_role = self.new_role
4814
4815     result = []
4816
4817     if self.op.ndparams:
4818       node.ndparams = self.new_ndparams
4819
4820     if self.op.powered is not None:
4821       node.powered = self.op.powered
4822
4823     for attr in ["master_capable", "vm_capable"]:
4824       val = getattr(self.op, attr)
4825       if val is not None:
4826         setattr(node, attr, val)
4827         result.append((attr, str(val)))
4828
4829     if new_role != old_role:
4830       # Tell the node to demote itself, if no longer MC and not offline
4831       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4832         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4833         if msg:
4834           self.LogWarning("Node failed to demote itself: %s", msg)
4835
4836       new_flags = self._R2F[new_role]
4837       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4838         if of != nf:
4839           result.append((desc, str(nf)))
4840       (node.master_candidate, node.drained, node.offline) = new_flags
4841
4842       # we locked all nodes, we adjust the CP before updating this node
4843       if self.lock_all:
4844         _AdjustCandidatePool(self, [node.name])
4845
4846     if self.op.secondary_ip:
4847       node.secondary_ip = self.op.secondary_ip
4848       result.append(("secondary_ip", self.op.secondary_ip))
4849
4850     # this will trigger configuration file update, if needed
4851     self.cfg.Update(node, feedback_fn)
4852
4853     # this will trigger job queue propagation or cleanup if the mc
4854     # flag changed
4855     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4856       self.context.ReaddNode(node)
4857
4858     return result
4859
4860
4861 class LUNodePowercycle(NoHooksLU):
4862   """Powercycles a node.
4863
4864   """
4865   REQ_BGL = False
4866
4867   def CheckArguments(self):
4868     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4869     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4870       raise errors.OpPrereqError("The node is the master and the force"
4871                                  " parameter was not set",
4872                                  errors.ECODE_INVAL)
4873
4874   def ExpandNames(self):
4875     """Locking for PowercycleNode.
4876
4877     This is a last-resort option and shouldn't block on other
4878     jobs. Therefore, we grab no locks.
4879
4880     """
4881     self.needed_locks = {}
4882
4883   def Exec(self, feedback_fn):
4884     """Reboots a node.
4885
4886     """
4887     result = self.rpc.call_node_powercycle(self.op.node_name,
4888                                            self.cfg.GetHypervisorType())
4889     result.Raise("Failed to schedule the reboot")
4890     return result.payload
4891
4892
4893 class LUClusterQuery(NoHooksLU):
4894   """Query cluster configuration.
4895
4896   """
4897   REQ_BGL = False
4898
4899   def ExpandNames(self):
4900     self.needed_locks = {}
4901
4902   def Exec(self, feedback_fn):
4903     """Return cluster config.
4904
4905     """
4906     cluster = self.cfg.GetClusterInfo()
4907     os_hvp = {}
4908
4909     # Filter just for enabled hypervisors
4910     for os_name, hv_dict in cluster.os_hvp.items():
4911       os_hvp[os_name] = {}
4912       for hv_name, hv_params in hv_dict.items():
4913         if hv_name in cluster.enabled_hypervisors:
4914           os_hvp[os_name][hv_name] = hv_params
4915
4916     # Convert ip_family to ip_version
4917     primary_ip_version = constants.IP4_VERSION
4918     if cluster.primary_ip_family == netutils.IP6Address.family:
4919       primary_ip_version = constants.IP6_VERSION
4920
4921     result = {
4922       "software_version": constants.RELEASE_VERSION,
4923       "protocol_version": constants.PROTOCOL_VERSION,
4924       "config_version": constants.CONFIG_VERSION,
4925       "os_api_version": max(constants.OS_API_VERSIONS),
4926       "export_version": constants.EXPORT_VERSION,
4927       "architecture": (platform.architecture()[0], platform.machine()),
4928       "name": cluster.cluster_name,
4929       "master": cluster.master_node,
4930       "default_hypervisor": cluster.enabled_hypervisors[0],
4931       "enabled_hypervisors": cluster.enabled_hypervisors,
4932       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4933                         for hypervisor_name in cluster.enabled_hypervisors]),
4934       "os_hvp": os_hvp,
4935       "beparams": cluster.beparams,
4936       "osparams": cluster.osparams,
4937       "nicparams": cluster.nicparams,
4938       "ndparams": cluster.ndparams,
4939       "candidate_pool_size": cluster.candidate_pool_size,
4940       "master_netdev": cluster.master_netdev,
4941       "volume_group_name": cluster.volume_group_name,
4942       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4943       "file_storage_dir": cluster.file_storage_dir,
4944       "shared_file_storage_dir": cluster.shared_file_storage_dir,
4945       "maintain_node_health": cluster.maintain_node_health,
4946       "ctime": cluster.ctime,
4947       "mtime": cluster.mtime,
4948       "uuid": cluster.uuid,
4949       "tags": list(cluster.GetTags()),
4950       "uid_pool": cluster.uid_pool,
4951       "default_iallocator": cluster.default_iallocator,
4952       "reserved_lvs": cluster.reserved_lvs,
4953       "primary_ip_version": primary_ip_version,
4954       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4955       "hidden_os": cluster.hidden_os,
4956       "blacklisted_os": cluster.blacklisted_os,
4957       }
4958
4959     return result
4960
4961
4962 class LUClusterConfigQuery(NoHooksLU):
4963   """Return configuration values.
4964
4965   """
4966   REQ_BGL = False
4967   _FIELDS_DYNAMIC = utils.FieldSet()
4968   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4969                                   "watcher_pause", "volume_group_name")
4970
4971   def CheckArguments(self):
4972     _CheckOutputFields(static=self._FIELDS_STATIC,
4973                        dynamic=self._FIELDS_DYNAMIC,
4974                        selected=self.op.output_fields)
4975
4976   def ExpandNames(self):
4977     self.needed_locks = {}
4978
4979   def Exec(self, feedback_fn):
4980     """Dump a representation of the cluster config to the standard output.
4981
4982     """
4983     values = []
4984     for field in self.op.output_fields:
4985       if field == "cluster_name":
4986         entry = self.cfg.GetClusterName()
4987       elif field == "master_node":
4988         entry = self.cfg.GetMasterNode()
4989       elif field == "drain_flag":
4990         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4991       elif field == "watcher_pause":
4992         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4993       elif field == "volume_group_name":
4994         entry = self.cfg.GetVGName()
4995       else:
4996         raise errors.ParameterError(field)
4997       values.append(entry)
4998     return values
4999
5000
5001 class LUInstanceActivateDisks(NoHooksLU):
5002   """Bring up an instance's disks.
5003
5004   """
5005   REQ_BGL = False
5006
5007   def ExpandNames(self):
5008     self._ExpandAndLockInstance()
5009     self.needed_locks[locking.LEVEL_NODE] = []
5010     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5011
5012   def DeclareLocks(self, level):
5013     if level == locking.LEVEL_NODE:
5014       self._LockInstancesNodes()
5015
5016   def CheckPrereq(self):
5017     """Check prerequisites.
5018
5019     This checks that the instance is in the cluster.
5020
5021     """
5022     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5023     assert self.instance is not None, \
5024       "Cannot retrieve locked instance %s" % self.op.instance_name
5025     _CheckNodeOnline(self, self.instance.primary_node)
5026
5027   def Exec(self, feedback_fn):
5028     """Activate the disks.
5029
5030     """
5031     disks_ok, disks_info = \
5032               _AssembleInstanceDisks(self, self.instance,
5033                                      ignore_size=self.op.ignore_size)
5034     if not disks_ok:
5035       raise errors.OpExecError("Cannot activate block devices")
5036
5037     return disks_info
5038
5039
5040 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5041                            ignore_size=False):
5042   """Prepare the block devices for an instance.
5043
5044   This sets up the block devices on all nodes.
5045
5046   @type lu: L{LogicalUnit}
5047   @param lu: the logical unit on whose behalf we execute
5048   @type instance: L{objects.Instance}
5049   @param instance: the instance for whose disks we assemble
5050   @type disks: list of L{objects.Disk} or None
5051   @param disks: which disks to assemble (or all, if None)
5052   @type ignore_secondaries: boolean
5053   @param ignore_secondaries: if true, errors on secondary nodes
5054       won't result in an error return from the function
5055   @type ignore_size: boolean
5056   @param ignore_size: if true, the current known size of the disk
5057       will not be used during the disk activation, useful for cases
5058       when the size is wrong
5059   @return: False if the operation failed, otherwise a list of
5060       (host, instance_visible_name, node_visible_name)
5061       with the mapping from node devices to instance devices
5062
5063   """
5064   device_info = []
5065   disks_ok = True
5066   iname = instance.name
5067   disks = _ExpandCheckDisks(instance, disks)
5068
5069   # With the two passes mechanism we try to reduce the window of
5070   # opportunity for the race condition of switching DRBD to primary
5071   # before handshaking occured, but we do not eliminate it
5072
5073   # The proper fix would be to wait (with some limits) until the
5074   # connection has been made and drbd transitions from WFConnection
5075   # into any other network-connected state (Connected, SyncTarget,
5076   # SyncSource, etc.)
5077
5078   # 1st pass, assemble on all nodes in secondary mode
5079   for idx, inst_disk in enumerate(disks):
5080     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5081       if ignore_size:
5082         node_disk = node_disk.Copy()
5083         node_disk.UnsetSize()
5084       lu.cfg.SetDiskID(node_disk, node)
5085       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5086       msg = result.fail_msg
5087       if msg:
5088         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5089                            " (is_primary=False, pass=1): %s",
5090                            inst_disk.iv_name, node, msg)
5091         if not ignore_secondaries:
5092           disks_ok = False
5093
5094   # FIXME: race condition on drbd migration to primary
5095
5096   # 2nd pass, do only the primary node
5097   for idx, inst_disk in enumerate(disks):
5098     dev_path = None
5099
5100     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5101       if node != instance.primary_node:
5102         continue
5103       if ignore_size:
5104         node_disk = node_disk.Copy()
5105         node_disk.UnsetSize()
5106       lu.cfg.SetDiskID(node_disk, node)
5107       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5108       msg = result.fail_msg
5109       if msg:
5110         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5111                            " (is_primary=True, pass=2): %s",
5112                            inst_disk.iv_name, node, msg)
5113         disks_ok = False
5114       else:
5115         dev_path = result.payload
5116
5117     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5118
5119   # leave the disks configured for the primary node
5120   # this is a workaround that would be fixed better by
5121   # improving the logical/physical id handling
5122   for disk in disks:
5123     lu.cfg.SetDiskID(disk, instance.primary_node)
5124
5125   return disks_ok, device_info
5126
5127
5128 def _StartInstanceDisks(lu, instance, force):
5129   """Start the disks of an instance.
5130
5131   """
5132   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5133                                            ignore_secondaries=force)
5134   if not disks_ok:
5135     _ShutdownInstanceDisks(lu, instance)
5136     if force is not None and not force:
5137       lu.proc.LogWarning("", hint="If the message above refers to a"
5138                          " secondary node,"
5139                          " you can retry the operation using '--force'.")
5140     raise errors.OpExecError("Disk consistency error")
5141
5142
5143 class LUInstanceDeactivateDisks(NoHooksLU):
5144   """Shutdown an instance's disks.
5145
5146   """
5147   REQ_BGL = False
5148
5149   def ExpandNames(self):
5150     self._ExpandAndLockInstance()
5151     self.needed_locks[locking.LEVEL_NODE] = []
5152     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5153
5154   def DeclareLocks(self, level):
5155     if level == locking.LEVEL_NODE:
5156       self._LockInstancesNodes()
5157
5158   def CheckPrereq(self):
5159     """Check prerequisites.
5160
5161     This checks that the instance is in the cluster.
5162
5163     """
5164     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5165     assert self.instance is not None, \
5166       "Cannot retrieve locked instance %s" % self.op.instance_name
5167
5168   def Exec(self, feedback_fn):
5169     """Deactivate the disks
5170
5171     """
5172     instance = self.instance
5173     if self.op.force:
5174       _ShutdownInstanceDisks(self, instance)
5175     else:
5176       _SafeShutdownInstanceDisks(self, instance)
5177
5178
5179 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5180   """Shutdown block devices of an instance.
5181
5182   This function checks if an instance is running, before calling
5183   _ShutdownInstanceDisks.
5184
5185   """
5186   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5187   _ShutdownInstanceDisks(lu, instance, disks=disks)
5188
5189
5190 def _ExpandCheckDisks(instance, disks):
5191   """Return the instance disks selected by the disks list
5192
5193   @type disks: list of L{objects.Disk} or None
5194   @param disks: selected disks
5195   @rtype: list of L{objects.Disk}
5196   @return: selected instance disks to act on
5197
5198   """
5199   if disks is None:
5200     return instance.disks
5201   else:
5202     if not set(disks).issubset(instance.disks):
5203       raise errors.ProgrammerError("Can only act on disks belonging to the"
5204                                    " target instance")
5205     return disks
5206
5207
5208 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5209   """Shutdown block devices of an instance.
5210
5211   This does the shutdown on all nodes of the instance.
5212
5213   If the ignore_primary is false, errors on the primary node are
5214   ignored.
5215
5216   """
5217   all_result = True
5218   disks = _ExpandCheckDisks(instance, disks)
5219
5220   for disk in disks:
5221     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5222       lu.cfg.SetDiskID(top_disk, node)
5223       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5224       msg = result.fail_msg
5225       if msg:
5226         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5227                       disk.iv_name, node, msg)
5228         if ((node == instance.primary_node and not ignore_primary) or
5229             (node != instance.primary_node and not result.offline)):
5230           all_result = False
5231   return all_result
5232
5233
5234 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5235   """Checks if a node has enough free memory.
5236
5237   This function check if a given node has the needed amount of free
5238   memory. In case the node has less memory or we cannot get the
5239   information from the node, this function raise an OpPrereqError
5240   exception.
5241
5242   @type lu: C{LogicalUnit}
5243   @param lu: a logical unit from which we get configuration data
5244   @type node: C{str}
5245   @param node: the node to check
5246   @type reason: C{str}
5247   @param reason: string to use in the error message
5248   @type requested: C{int}
5249   @param requested: the amount of memory in MiB to check for
5250   @type hypervisor_name: C{str}
5251   @param hypervisor_name: the hypervisor to ask for memory stats
5252   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5253       we cannot check the node
5254
5255   """
5256   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5257   nodeinfo[node].Raise("Can't get data from node %s" % node,
5258                        prereq=True, ecode=errors.ECODE_ENVIRON)
5259   free_mem = nodeinfo[node].payload.get('memory_free', None)
5260   if not isinstance(free_mem, int):
5261     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5262                                " was '%s'" % (node, free_mem),
5263                                errors.ECODE_ENVIRON)
5264   if requested > free_mem:
5265     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5266                                " needed %s MiB, available %s MiB" %
5267                                (node, reason, requested, free_mem),
5268                                errors.ECODE_NORES)
5269
5270
5271 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5272   """Checks if nodes have enough free disk space in the all VGs.
5273
5274   This function check if all given nodes have the needed amount of
5275   free disk. In case any node has less disk or we cannot get the
5276   information from the node, this function raise an OpPrereqError
5277   exception.
5278
5279   @type lu: C{LogicalUnit}
5280   @param lu: a logical unit from which we get configuration data
5281   @type nodenames: C{list}
5282   @param nodenames: the list of node names to check
5283   @type req_sizes: C{dict}
5284   @param req_sizes: the hash of vg and corresponding amount of disk in
5285       MiB to check for
5286   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5287       or we cannot check the node
5288
5289   """
5290   for vg, req_size in req_sizes.items():
5291     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5292
5293
5294 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5295   """Checks if nodes have enough free disk space in the specified VG.
5296
5297   This function check if all given nodes have the needed amount of
5298   free disk. In case any node has less disk or we cannot get the
5299   information from the node, this function raise an OpPrereqError
5300   exception.
5301
5302   @type lu: C{LogicalUnit}
5303   @param lu: a logical unit from which we get configuration data
5304   @type nodenames: C{list}
5305   @param nodenames: the list of node names to check
5306   @type vg: C{str}
5307   @param vg: the volume group to check
5308   @type requested: C{int}
5309   @param requested: the amount of disk in MiB to check for
5310   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5311       or we cannot check the node
5312
5313   """
5314   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5315   for node in nodenames:
5316     info = nodeinfo[node]
5317     info.Raise("Cannot get current information from node %s" % node,
5318                prereq=True, ecode=errors.ECODE_ENVIRON)
5319     vg_free = info.payload.get("vg_free", None)
5320     if not isinstance(vg_free, int):
5321       raise errors.OpPrereqError("Can't compute free disk space on node"
5322                                  " %s for vg %s, result was '%s'" %
5323                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5324     if requested > vg_free:
5325       raise errors.OpPrereqError("Not enough disk space on target node %s"
5326                                  " vg %s: required %d MiB, available %d MiB" %
5327                                  (node, vg, requested, vg_free),
5328                                  errors.ECODE_NORES)
5329
5330
5331 class LUInstanceStartup(LogicalUnit):
5332   """Starts an instance.
5333
5334   """
5335   HPATH = "instance-start"
5336   HTYPE = constants.HTYPE_INSTANCE
5337   REQ_BGL = False
5338
5339   def CheckArguments(self):
5340     # extra beparams
5341     if self.op.beparams:
5342       # fill the beparams dict
5343       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5344
5345   def ExpandNames(self):
5346     self._ExpandAndLockInstance()
5347
5348   def BuildHooksEnv(self):
5349     """Build hooks env.
5350
5351     This runs on master, primary and secondary nodes of the instance.
5352
5353     """
5354     env = {
5355       "FORCE": self.op.force,
5356       }
5357
5358     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5359
5360     return env
5361
5362   def BuildHooksNodes(self):
5363     """Build hooks nodes.
5364
5365     """
5366     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5367     return (nl, nl)
5368
5369   def CheckPrereq(self):
5370     """Check prerequisites.
5371
5372     This checks that the instance is in the cluster.
5373
5374     """
5375     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5376     assert self.instance is not None, \
5377       "Cannot retrieve locked instance %s" % self.op.instance_name
5378
5379     # extra hvparams
5380     if self.op.hvparams:
5381       # check hypervisor parameter syntax (locally)
5382       cluster = self.cfg.GetClusterInfo()
5383       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5384       filled_hvp = cluster.FillHV(instance)
5385       filled_hvp.update(self.op.hvparams)
5386       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5387       hv_type.CheckParameterSyntax(filled_hvp)
5388       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5389
5390     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5391
5392     if self.primary_offline and self.op.ignore_offline_nodes:
5393       self.proc.LogWarning("Ignoring offline primary node")
5394
5395       if self.op.hvparams or self.op.beparams:
5396         self.proc.LogWarning("Overridden parameters are ignored")
5397     else:
5398       _CheckNodeOnline(self, instance.primary_node)
5399
5400       bep = self.cfg.GetClusterInfo().FillBE(instance)
5401
5402       # check bridges existence
5403       _CheckInstanceBridgesExist(self, instance)
5404
5405       remote_info = self.rpc.call_instance_info(instance.primary_node,
5406                                                 instance.name,
5407                                                 instance.hypervisor)
5408       remote_info.Raise("Error checking node %s" % instance.primary_node,
5409                         prereq=True, ecode=errors.ECODE_ENVIRON)
5410       if not remote_info.payload: # not running already
5411         _CheckNodeFreeMemory(self, instance.primary_node,
5412                              "starting instance %s" % instance.name,
5413                              bep[constants.BE_MEMORY], instance.hypervisor)
5414
5415   def Exec(self, feedback_fn):
5416     """Start the instance.
5417
5418     """
5419     instance = self.instance
5420     force = self.op.force
5421
5422     self.cfg.MarkInstanceUp(instance.name)
5423
5424     if self.primary_offline:
5425       assert self.op.ignore_offline_nodes
5426       self.proc.LogInfo("Primary node offline, marked instance as started")
5427     else:
5428       node_current = instance.primary_node
5429
5430       _StartInstanceDisks(self, instance, force)
5431
5432       result = self.rpc.call_instance_start(node_current, instance,
5433                                             self.op.hvparams, self.op.beparams)
5434       msg = result.fail_msg
5435       if msg:
5436         _ShutdownInstanceDisks(self, instance)
5437         raise errors.OpExecError("Could not start instance: %s" % msg)
5438
5439
5440 class LUInstanceReboot(LogicalUnit):
5441   """Reboot an instance.
5442
5443   """
5444   HPATH = "instance-reboot"
5445   HTYPE = constants.HTYPE_INSTANCE
5446   REQ_BGL = False
5447
5448   def ExpandNames(self):
5449     self._ExpandAndLockInstance()
5450
5451   def BuildHooksEnv(self):
5452     """Build hooks env.
5453
5454     This runs on master, primary and secondary nodes of the instance.
5455
5456     """
5457     env = {
5458       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5459       "REBOOT_TYPE": self.op.reboot_type,
5460       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5461       }
5462
5463     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5464
5465     return env
5466
5467   def BuildHooksNodes(self):
5468     """Build hooks nodes.
5469
5470     """
5471     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5472     return (nl, nl)
5473
5474   def CheckPrereq(self):
5475     """Check prerequisites.
5476
5477     This checks that the instance is in the cluster.
5478
5479     """
5480     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5481     assert self.instance is not None, \
5482       "Cannot retrieve locked instance %s" % self.op.instance_name
5483
5484     _CheckNodeOnline(self, instance.primary_node)
5485
5486     # check bridges existence
5487     _CheckInstanceBridgesExist(self, instance)
5488
5489   def Exec(self, feedback_fn):
5490     """Reboot the instance.
5491
5492     """
5493     instance = self.instance
5494     ignore_secondaries = self.op.ignore_secondaries
5495     reboot_type = self.op.reboot_type
5496
5497     remote_info = self.rpc.call_instance_info(instance.primary_node,
5498                                               instance.name,
5499                                               instance.hypervisor)
5500     remote_info.Raise("Error checking node %s" % instance.primary_node)
5501     instance_running = bool(remote_info.payload)
5502
5503     node_current = instance.primary_node
5504
5505     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5506                                             constants.INSTANCE_REBOOT_HARD]:
5507       for disk in instance.disks:
5508         self.cfg.SetDiskID(disk, node_current)
5509       result = self.rpc.call_instance_reboot(node_current, instance,
5510                                              reboot_type,
5511                                              self.op.shutdown_timeout)
5512       result.Raise("Could not reboot instance")
5513     else:
5514       if instance_running:
5515         result = self.rpc.call_instance_shutdown(node_current, instance,
5516                                                  self.op.shutdown_timeout)
5517         result.Raise("Could not shutdown instance for full reboot")
5518         _ShutdownInstanceDisks(self, instance)
5519       else:
5520         self.LogInfo("Instance %s was already stopped, starting now",
5521                      instance.name)
5522       _StartInstanceDisks(self, instance, ignore_secondaries)
5523       result = self.rpc.call_instance_start(node_current, instance, None, None)
5524       msg = result.fail_msg
5525       if msg:
5526         _ShutdownInstanceDisks(self, instance)
5527         raise errors.OpExecError("Could not start instance for"
5528                                  " full reboot: %s" % msg)
5529
5530     self.cfg.MarkInstanceUp(instance.name)
5531
5532
5533 class LUInstanceShutdown(LogicalUnit):
5534   """Shutdown an instance.
5535
5536   """
5537   HPATH = "instance-stop"
5538   HTYPE = constants.HTYPE_INSTANCE
5539   REQ_BGL = False
5540
5541   def ExpandNames(self):
5542     self._ExpandAndLockInstance()
5543
5544   def BuildHooksEnv(self):
5545     """Build hooks env.
5546
5547     This runs on master, primary and secondary nodes of the instance.
5548
5549     """
5550     env = _BuildInstanceHookEnvByObject(self, self.instance)
5551     env["TIMEOUT"] = self.op.timeout
5552     return env
5553
5554   def BuildHooksNodes(self):
5555     """Build hooks nodes.
5556
5557     """
5558     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5559     return (nl, nl)
5560
5561   def CheckPrereq(self):
5562     """Check prerequisites.
5563
5564     This checks that the instance is in the cluster.
5565
5566     """
5567     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5568     assert self.instance is not None, \
5569       "Cannot retrieve locked instance %s" % self.op.instance_name
5570
5571     self.primary_offline = \
5572       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5573
5574     if self.primary_offline and self.op.ignore_offline_nodes:
5575       self.proc.LogWarning("Ignoring offline primary node")
5576     else:
5577       _CheckNodeOnline(self, self.instance.primary_node)
5578
5579   def Exec(self, feedback_fn):
5580     """Shutdown the instance.
5581
5582     """
5583     instance = self.instance
5584     node_current = instance.primary_node
5585     timeout = self.op.timeout
5586
5587     self.cfg.MarkInstanceDown(instance.name)
5588
5589     if self.primary_offline:
5590       assert self.op.ignore_offline_nodes
5591       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5592     else:
5593       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5594       msg = result.fail_msg
5595       if msg:
5596         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5597
5598       _ShutdownInstanceDisks(self, instance)
5599
5600
5601 class LUInstanceReinstall(LogicalUnit):
5602   """Reinstall an instance.
5603
5604   """
5605   HPATH = "instance-reinstall"
5606   HTYPE = constants.HTYPE_INSTANCE
5607   REQ_BGL = False
5608
5609   def ExpandNames(self):
5610     self._ExpandAndLockInstance()
5611
5612   def BuildHooksEnv(self):
5613     """Build hooks env.
5614
5615     This runs on master, primary and secondary nodes of the instance.
5616
5617     """
5618     return _BuildInstanceHookEnvByObject(self, self.instance)
5619
5620   def BuildHooksNodes(self):
5621     """Build hooks nodes.
5622
5623     """
5624     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5625     return (nl, nl)
5626
5627   def CheckPrereq(self):
5628     """Check prerequisites.
5629
5630     This checks that the instance is in the cluster and is not running.
5631
5632     """
5633     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5634     assert instance is not None, \
5635       "Cannot retrieve locked instance %s" % self.op.instance_name
5636     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5637                      " offline, cannot reinstall")
5638     for node in instance.secondary_nodes:
5639       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5640                        " cannot reinstall")
5641
5642     if instance.disk_template == constants.DT_DISKLESS:
5643       raise errors.OpPrereqError("Instance '%s' has no disks" %
5644                                  self.op.instance_name,
5645                                  errors.ECODE_INVAL)
5646     _CheckInstanceDown(self, instance, "cannot reinstall")
5647
5648     if self.op.os_type is not None:
5649       # OS verification
5650       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5651       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5652       instance_os = self.op.os_type
5653     else:
5654       instance_os = instance.os
5655
5656     nodelist = list(instance.all_nodes)
5657
5658     if self.op.osparams:
5659       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5660       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5661       self.os_inst = i_osdict # the new dict (without defaults)
5662     else:
5663       self.os_inst = None
5664
5665     self.instance = instance
5666
5667   def Exec(self, feedback_fn):
5668     """Reinstall the instance.
5669
5670     """
5671     inst = self.instance
5672
5673     if self.op.os_type is not None:
5674       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5675       inst.os = self.op.os_type
5676       # Write to configuration
5677       self.cfg.Update(inst, feedback_fn)
5678
5679     _StartInstanceDisks(self, inst, None)
5680     try:
5681       feedback_fn("Running the instance OS create scripts...")
5682       # FIXME: pass debug option from opcode to backend
5683       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5684                                              self.op.debug_level,
5685                                              osparams=self.os_inst)
5686       result.Raise("Could not install OS for instance %s on node %s" %
5687                    (inst.name, inst.primary_node))
5688     finally:
5689       _ShutdownInstanceDisks(self, inst)
5690
5691
5692 class LUInstanceRecreateDisks(LogicalUnit):
5693   """Recreate an instance's missing disks.
5694
5695   """
5696   HPATH = "instance-recreate-disks"
5697   HTYPE = constants.HTYPE_INSTANCE
5698   REQ_BGL = False
5699
5700   def ExpandNames(self):
5701     self._ExpandAndLockInstance()
5702
5703   def BuildHooksEnv(self):
5704     """Build hooks env.
5705
5706     This runs on master, primary and secondary nodes of the instance.
5707
5708     """
5709     return _BuildInstanceHookEnvByObject(self, self.instance)
5710
5711   def BuildHooksNodes(self):
5712     """Build hooks nodes.
5713
5714     """
5715     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5716     return (nl, nl)
5717
5718   def CheckPrereq(self):
5719     """Check prerequisites.
5720
5721     This checks that the instance is in the cluster and is not running.
5722
5723     """
5724     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5725     assert instance is not None, \
5726       "Cannot retrieve locked instance %s" % self.op.instance_name
5727     _CheckNodeOnline(self, instance.primary_node)
5728
5729     if instance.disk_template == constants.DT_DISKLESS:
5730       raise errors.OpPrereqError("Instance '%s' has no disks" %
5731                                  self.op.instance_name, errors.ECODE_INVAL)
5732     _CheckInstanceDown(self, instance, "cannot recreate disks")
5733
5734     if not self.op.disks:
5735       self.op.disks = range(len(instance.disks))
5736     else:
5737       for idx in self.op.disks:
5738         if idx >= len(instance.disks):
5739           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
5740                                      errors.ECODE_INVAL)
5741
5742     self.instance = instance
5743
5744   def Exec(self, feedback_fn):
5745     """Recreate the disks.
5746
5747     """
5748     to_skip = []
5749     for idx, _ in enumerate(self.instance.disks):
5750       if idx not in self.op.disks: # disk idx has not been passed in
5751         to_skip.append(idx)
5752         continue
5753
5754     _CreateDisks(self, self.instance, to_skip=to_skip)
5755
5756
5757 class LUInstanceRename(LogicalUnit):
5758   """Rename an instance.
5759
5760   """
5761   HPATH = "instance-rename"
5762   HTYPE = constants.HTYPE_INSTANCE
5763
5764   def CheckArguments(self):
5765     """Check arguments.
5766
5767     """
5768     if self.op.ip_check and not self.op.name_check:
5769       # TODO: make the ip check more flexible and not depend on the name check
5770       raise errors.OpPrereqError("IP address check requires a name check",
5771                                  errors.ECODE_INVAL)
5772
5773   def BuildHooksEnv(self):
5774     """Build hooks env.
5775
5776     This runs on master, primary and secondary nodes of the instance.
5777
5778     """
5779     env = _BuildInstanceHookEnvByObject(self, self.instance)
5780     env["INSTANCE_NEW_NAME"] = self.op.new_name
5781     return env
5782
5783   def BuildHooksNodes(self):
5784     """Build hooks nodes.
5785
5786     """
5787     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5788     return (nl, nl)
5789
5790   def CheckPrereq(self):
5791     """Check prerequisites.
5792
5793     This checks that the instance is in the cluster and is not running.
5794
5795     """
5796     self.op.instance_name = _ExpandInstanceName(self.cfg,
5797                                                 self.op.instance_name)
5798     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5799     assert instance is not None
5800     _CheckNodeOnline(self, instance.primary_node)
5801     _CheckInstanceDown(self, instance, "cannot rename")
5802     self.instance = instance
5803
5804     new_name = self.op.new_name
5805     if self.op.name_check:
5806       hostname = netutils.GetHostname(name=new_name)
5807       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5808                    hostname.name)
5809       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5810         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5811                                     " same as given hostname '%s'") %
5812                                     (hostname.name, self.op.new_name),
5813                                     errors.ECODE_INVAL)
5814       new_name = self.op.new_name = hostname.name
5815       if (self.op.ip_check and
5816           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5817         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5818                                    (hostname.ip, new_name),
5819                                    errors.ECODE_NOTUNIQUE)
5820
5821     instance_list = self.cfg.GetInstanceList()
5822     if new_name in instance_list and new_name != instance.name:
5823       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5824                                  new_name, errors.ECODE_EXISTS)
5825
5826   def Exec(self, feedback_fn):
5827     """Rename the instance.
5828
5829     """
5830     inst = self.instance
5831     old_name = inst.name
5832
5833     rename_file_storage = False
5834     if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5835         self.op.new_name != inst.name):
5836       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5837       rename_file_storage = True
5838
5839     self.cfg.RenameInstance(inst.name, self.op.new_name)
5840     # Change the instance lock. This is definitely safe while we hold the BGL.
5841     # Otherwise the new lock would have to be added in acquired mode.
5842     assert self.REQ_BGL
5843     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
5844     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5845
5846     # re-read the instance from the configuration after rename
5847     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5848
5849     if rename_file_storage:
5850       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5851       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5852                                                      old_file_storage_dir,
5853                                                      new_file_storage_dir)
5854       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5855                    " (but the instance has been renamed in Ganeti)" %
5856                    (inst.primary_node, old_file_storage_dir,
5857                     new_file_storage_dir))
5858
5859     _StartInstanceDisks(self, inst, None)
5860     try:
5861       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5862                                                  old_name, self.op.debug_level)
5863       msg = result.fail_msg
5864       if msg:
5865         msg = ("Could not run OS rename script for instance %s on node %s"
5866                " (but the instance has been renamed in Ganeti): %s" %
5867                (inst.name, inst.primary_node, msg))
5868         self.proc.LogWarning(msg)
5869     finally:
5870       _ShutdownInstanceDisks(self, inst)
5871
5872     return inst.name
5873
5874
5875 class LUInstanceRemove(LogicalUnit):
5876   """Remove an instance.
5877
5878   """
5879   HPATH = "instance-remove"
5880   HTYPE = constants.HTYPE_INSTANCE
5881   REQ_BGL = False
5882
5883   def ExpandNames(self):
5884     self._ExpandAndLockInstance()
5885     self.needed_locks[locking.LEVEL_NODE] = []
5886     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5887
5888   def DeclareLocks(self, level):
5889     if level == locking.LEVEL_NODE:
5890       self._LockInstancesNodes()
5891
5892   def BuildHooksEnv(self):
5893     """Build hooks env.
5894
5895     This runs on master, primary and secondary nodes of the instance.
5896
5897     """
5898     env = _BuildInstanceHookEnvByObject(self, self.instance)
5899     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5900     return env
5901
5902   def BuildHooksNodes(self):
5903     """Build hooks nodes.
5904
5905     """
5906     nl = [self.cfg.GetMasterNode()]
5907     nl_post = list(self.instance.all_nodes) + nl
5908     return (nl, nl_post)
5909
5910   def CheckPrereq(self):
5911     """Check prerequisites.
5912
5913     This checks that the instance is in the cluster.
5914
5915     """
5916     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5917     assert self.instance is not None, \
5918       "Cannot retrieve locked instance %s" % self.op.instance_name
5919
5920   def Exec(self, feedback_fn):
5921     """Remove the instance.
5922
5923     """
5924     instance = self.instance
5925     logging.info("Shutting down instance %s on node %s",
5926                  instance.name, instance.primary_node)
5927
5928     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5929                                              self.op.shutdown_timeout)
5930     msg = result.fail_msg
5931     if msg:
5932       if self.op.ignore_failures:
5933         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5934       else:
5935         raise errors.OpExecError("Could not shutdown instance %s on"
5936                                  " node %s: %s" %
5937                                  (instance.name, instance.primary_node, msg))
5938
5939     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5940
5941
5942 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5943   """Utility function to remove an instance.
5944
5945   """
5946   logging.info("Removing block devices for instance %s", instance.name)
5947
5948   if not _RemoveDisks(lu, instance):
5949     if not ignore_failures:
5950       raise errors.OpExecError("Can't remove instance's disks")
5951     feedback_fn("Warning: can't remove instance's disks")
5952
5953   logging.info("Removing instance %s out of cluster config", instance.name)
5954
5955   lu.cfg.RemoveInstance(instance.name)
5956
5957   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5958     "Instance lock removal conflict"
5959
5960   # Remove lock for the instance
5961   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5962
5963
5964 class LUInstanceQuery(NoHooksLU):
5965   """Logical unit for querying instances.
5966
5967   """
5968   # pylint: disable-msg=W0142
5969   REQ_BGL = False
5970
5971   def CheckArguments(self):
5972     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5973                              self.op.output_fields, self.op.use_locking)
5974
5975   def ExpandNames(self):
5976     self.iq.ExpandNames(self)
5977
5978   def DeclareLocks(self, level):
5979     self.iq.DeclareLocks(self, level)
5980
5981   def Exec(self, feedback_fn):
5982     return self.iq.OldStyleQuery(self)
5983
5984
5985 class LUInstanceFailover(LogicalUnit):
5986   """Failover an instance.
5987
5988   """
5989   HPATH = "instance-failover"
5990   HTYPE = constants.HTYPE_INSTANCE
5991   REQ_BGL = False
5992
5993   def CheckArguments(self):
5994     """Check the arguments.
5995
5996     """
5997     self.iallocator = getattr(self.op, "iallocator", None)
5998     self.target_node = getattr(self.op, "target_node", None)
5999
6000   def ExpandNames(self):
6001     self._ExpandAndLockInstance()
6002
6003     if self.op.target_node is not None:
6004       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6005
6006     self.needed_locks[locking.LEVEL_NODE] = []
6007     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6008
6009     ignore_consistency = self.op.ignore_consistency
6010     shutdown_timeout = self.op.shutdown_timeout
6011     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6012                                        cleanup=False,
6013                                        failover=True,
6014                                        ignore_consistency=ignore_consistency,
6015                                        shutdown_timeout=shutdown_timeout)
6016     self.tasklets = [self._migrater]
6017
6018   def DeclareLocks(self, level):
6019     if level == locking.LEVEL_NODE:
6020       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6021       if instance.disk_template in constants.DTS_EXT_MIRROR:
6022         if self.op.target_node is None:
6023           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6024         else:
6025           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6026                                                    self.op.target_node]
6027         del self.recalculate_locks[locking.LEVEL_NODE]
6028       else:
6029         self._LockInstancesNodes()
6030
6031   def BuildHooksEnv(self):
6032     """Build hooks env.
6033
6034     This runs on master, primary and secondary nodes of the instance.
6035
6036     """
6037     instance = self._migrater.instance
6038     source_node = instance.primary_node
6039     target_node = self.op.target_node
6040     env = {
6041       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6042       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6043       "OLD_PRIMARY": source_node,
6044       "NEW_PRIMARY": target_node,
6045       }
6046
6047     if instance.disk_template in constants.DTS_INT_MIRROR:
6048       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6049       env["NEW_SECONDARY"] = source_node
6050     else:
6051       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6052
6053     env.update(_BuildInstanceHookEnvByObject(self, instance))
6054
6055     return env
6056
6057   def BuildHooksNodes(self):
6058     """Build hooks nodes.
6059
6060     """
6061     instance = self._migrater.instance
6062     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6063     return (nl, nl + [instance.primary_node])
6064
6065
6066 class LUInstanceMigrate(LogicalUnit):
6067   """Migrate an instance.
6068
6069   This is migration without shutting down, compared to the failover,
6070   which is done with shutdown.
6071
6072   """
6073   HPATH = "instance-migrate"
6074   HTYPE = constants.HTYPE_INSTANCE
6075   REQ_BGL = False
6076
6077   def ExpandNames(self):
6078     self._ExpandAndLockInstance()
6079
6080     if self.op.target_node is not None:
6081       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6082
6083     self.needed_locks[locking.LEVEL_NODE] = []
6084     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6085
6086     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6087                                        cleanup=self.op.cleanup,
6088                                        failover=False,
6089                                        fallback=self.op.allow_failover)
6090     self.tasklets = [self._migrater]
6091
6092   def DeclareLocks(self, level):
6093     if level == locking.LEVEL_NODE:
6094       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6095       if instance.disk_template in constants.DTS_EXT_MIRROR:
6096         if self.op.target_node is None:
6097           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6098         else:
6099           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6100                                                    self.op.target_node]
6101         del self.recalculate_locks[locking.LEVEL_NODE]
6102       else:
6103         self._LockInstancesNodes()
6104
6105   def BuildHooksEnv(self):
6106     """Build hooks env.
6107
6108     This runs on master, primary and secondary nodes of the instance.
6109
6110     """
6111     instance = self._migrater.instance
6112     source_node = instance.primary_node
6113     target_node = self.op.target_node
6114     env = _BuildInstanceHookEnvByObject(self, instance)
6115     env.update({
6116       "MIGRATE_LIVE": self._migrater.live,
6117       "MIGRATE_CLEANUP": self.op.cleanup,
6118       "OLD_PRIMARY": source_node,
6119       "NEW_PRIMARY": target_node,
6120       })
6121
6122     if instance.disk_template in constants.DTS_INT_MIRROR:
6123       env["OLD_SECONDARY"] = target_node
6124       env["NEW_SECONDARY"] = source_node
6125     else:
6126       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6127
6128     return env
6129
6130   def BuildHooksNodes(self):
6131     """Build hooks nodes.
6132
6133     """
6134     instance = self._migrater.instance
6135     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6136     return (nl, nl + [instance.primary_node])
6137
6138
6139 class LUInstanceMove(LogicalUnit):
6140   """Move an instance by data-copying.
6141
6142   """
6143   HPATH = "instance-move"
6144   HTYPE = constants.HTYPE_INSTANCE
6145   REQ_BGL = False
6146
6147   def ExpandNames(self):
6148     self._ExpandAndLockInstance()
6149     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6150     self.op.target_node = target_node
6151     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6152     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6153
6154   def DeclareLocks(self, level):
6155     if level == locking.LEVEL_NODE:
6156       self._LockInstancesNodes(primary_only=True)
6157
6158   def BuildHooksEnv(self):
6159     """Build hooks env.
6160
6161     This runs on master, primary and secondary nodes of the instance.
6162
6163     """
6164     env = {
6165       "TARGET_NODE": self.op.target_node,
6166       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6167       }
6168     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6169     return env
6170
6171   def BuildHooksNodes(self):
6172     """Build hooks nodes.
6173
6174     """
6175     nl = [
6176       self.cfg.GetMasterNode(),
6177       self.instance.primary_node,
6178       self.op.target_node,
6179       ]
6180     return (nl, nl)
6181
6182   def CheckPrereq(self):
6183     """Check prerequisites.
6184
6185     This checks that the instance is in the cluster.
6186
6187     """
6188     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6189     assert self.instance is not None, \
6190       "Cannot retrieve locked instance %s" % self.op.instance_name
6191
6192     node = self.cfg.GetNodeInfo(self.op.target_node)
6193     assert node is not None, \
6194       "Cannot retrieve locked node %s" % self.op.target_node
6195
6196     self.target_node = target_node = node.name
6197
6198     if target_node == instance.primary_node:
6199       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6200                                  (instance.name, target_node),
6201                                  errors.ECODE_STATE)
6202
6203     bep = self.cfg.GetClusterInfo().FillBE(instance)
6204
6205     for idx, dsk in enumerate(instance.disks):
6206       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6207         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6208                                    " cannot copy" % idx, errors.ECODE_STATE)
6209
6210     _CheckNodeOnline(self, target_node)
6211     _CheckNodeNotDrained(self, target_node)
6212     _CheckNodeVmCapable(self, target_node)
6213
6214     if instance.admin_up:
6215       # check memory requirements on the secondary node
6216       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6217                            instance.name, bep[constants.BE_MEMORY],
6218                            instance.hypervisor)
6219     else:
6220       self.LogInfo("Not checking memory on the secondary node as"
6221                    " instance will not be started")
6222
6223     # check bridge existance
6224     _CheckInstanceBridgesExist(self, instance, node=target_node)
6225
6226   def Exec(self, feedback_fn):
6227     """Move an instance.
6228
6229     The move is done by shutting it down on its present node, copying
6230     the data over (slow) and starting it on the new node.
6231
6232     """
6233     instance = self.instance
6234
6235     source_node = instance.primary_node
6236     target_node = self.target_node
6237
6238     self.LogInfo("Shutting down instance %s on source node %s",
6239                  instance.name, source_node)
6240
6241     result = self.rpc.call_instance_shutdown(source_node, instance,
6242                                              self.op.shutdown_timeout)
6243     msg = result.fail_msg
6244     if msg:
6245       if self.op.ignore_consistency:
6246         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6247                              " Proceeding anyway. Please make sure node"
6248                              " %s is down. Error details: %s",
6249                              instance.name, source_node, source_node, msg)
6250       else:
6251         raise errors.OpExecError("Could not shutdown instance %s on"
6252                                  " node %s: %s" %
6253                                  (instance.name, source_node, msg))
6254
6255     # create the target disks
6256     try:
6257       _CreateDisks(self, instance, target_node=target_node)
6258     except errors.OpExecError:
6259       self.LogWarning("Device creation failed, reverting...")
6260       try:
6261         _RemoveDisks(self, instance, target_node=target_node)
6262       finally:
6263         self.cfg.ReleaseDRBDMinors(instance.name)
6264         raise
6265
6266     cluster_name = self.cfg.GetClusterInfo().cluster_name
6267
6268     errs = []
6269     # activate, get path, copy the data over
6270     for idx, disk in enumerate(instance.disks):
6271       self.LogInfo("Copying data for disk %d", idx)
6272       result = self.rpc.call_blockdev_assemble(target_node, disk,
6273                                                instance.name, True, idx)
6274       if result.fail_msg:
6275         self.LogWarning("Can't assemble newly created disk %d: %s",
6276                         idx, result.fail_msg)
6277         errs.append(result.fail_msg)
6278         break
6279       dev_path = result.payload
6280       result = self.rpc.call_blockdev_export(source_node, disk,
6281                                              target_node, dev_path,
6282                                              cluster_name)
6283       if result.fail_msg:
6284         self.LogWarning("Can't copy data over for disk %d: %s",
6285                         idx, result.fail_msg)
6286         errs.append(result.fail_msg)
6287         break
6288
6289     if errs:
6290       self.LogWarning("Some disks failed to copy, aborting")
6291       try:
6292         _RemoveDisks(self, instance, target_node=target_node)
6293       finally:
6294         self.cfg.ReleaseDRBDMinors(instance.name)
6295         raise errors.OpExecError("Errors during disk copy: %s" %
6296                                  (",".join(errs),))
6297
6298     instance.primary_node = target_node
6299     self.cfg.Update(instance, feedback_fn)
6300
6301     self.LogInfo("Removing the disks on the original node")
6302     _RemoveDisks(self, instance, target_node=source_node)
6303
6304     # Only start the instance if it's marked as up
6305     if instance.admin_up:
6306       self.LogInfo("Starting instance %s on node %s",
6307                    instance.name, target_node)
6308
6309       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6310                                            ignore_secondaries=True)
6311       if not disks_ok:
6312         _ShutdownInstanceDisks(self, instance)
6313         raise errors.OpExecError("Can't activate the instance's disks")
6314
6315       result = self.rpc.call_instance_start(target_node, instance, None, None)
6316       msg = result.fail_msg
6317       if msg:
6318         _ShutdownInstanceDisks(self, instance)
6319         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6320                                  (instance.name, target_node, msg))
6321
6322
6323 class LUNodeMigrate(LogicalUnit):
6324   """Migrate all instances from a node.
6325
6326   """
6327   HPATH = "node-migrate"
6328   HTYPE = constants.HTYPE_NODE
6329   REQ_BGL = False
6330
6331   def CheckArguments(self):
6332     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6333
6334   def ExpandNames(self):
6335     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6336
6337     self.needed_locks = {}
6338
6339     # Create tasklets for migrating instances for all instances on this node
6340     names = []
6341     tasklets = []
6342
6343     self.lock_all_nodes = False
6344
6345     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6346       logging.debug("Migrating instance %s", inst.name)
6347       names.append(inst.name)
6348
6349       tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False))
6350
6351       if inst.disk_template in constants.DTS_EXT_MIRROR:
6352         # We need to lock all nodes, as the iallocator will choose the
6353         # destination nodes afterwards
6354         self.lock_all_nodes = True
6355
6356     self.tasklets = tasklets
6357
6358     # Declare node locks
6359     if self.lock_all_nodes:
6360       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6361     else:
6362       self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6363       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6364
6365     # Declare instance locks
6366     self.needed_locks[locking.LEVEL_INSTANCE] = names
6367
6368   def DeclareLocks(self, level):
6369     if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6370       self._LockInstancesNodes()
6371
6372   def BuildHooksEnv(self):
6373     """Build hooks env.
6374
6375     This runs on the master, the primary and all the secondaries.
6376
6377     """
6378     return {
6379       "NODE_NAME": self.op.node_name,
6380       }
6381
6382   def BuildHooksNodes(self):
6383     """Build hooks nodes.
6384
6385     """
6386     nl = [self.cfg.GetMasterNode()]
6387     return (nl, nl)
6388
6389
6390 class TLMigrateInstance(Tasklet):
6391   """Tasklet class for instance migration.
6392
6393   @type live: boolean
6394   @ivar live: whether the migration will be done live or non-live;
6395       this variable is initalized only after CheckPrereq has run
6396   @type cleanup: boolean
6397   @ivar cleanup: Wheater we cleanup from a failed migration
6398   @type iallocator: string
6399   @ivar iallocator: The iallocator used to determine target_node
6400   @type target_node: string
6401   @ivar target_node: If given, the target_node to reallocate the instance to
6402   @type failover: boolean
6403   @ivar failover: Whether operation results in failover or migration
6404   @type fallback: boolean
6405   @ivar fallback: Whether fallback to failover is allowed if migration not
6406                   possible
6407   @type ignore_consistency: boolean
6408   @ivar ignore_consistency: Wheter we should ignore consistency between source
6409                             and target node
6410   @type shutdown_timeout: int
6411   @ivar shutdown_timeout: In case of failover timeout of the shutdown
6412
6413   """
6414   def __init__(self, lu, instance_name, cleanup=False,
6415                failover=False, fallback=False,
6416                ignore_consistency=False,
6417                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6418     """Initializes this class.
6419
6420     """
6421     Tasklet.__init__(self, lu)
6422
6423     # Parameters
6424     self.instance_name = instance_name
6425     self.cleanup = cleanup
6426     self.live = False # will be overridden later
6427     self.failover = failover
6428     self.fallback = fallback
6429     self.ignore_consistency = ignore_consistency
6430     self.shutdown_timeout = shutdown_timeout
6431
6432   def CheckPrereq(self):
6433     """Check prerequisites.
6434
6435     This checks that the instance is in the cluster.
6436
6437     """
6438     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6439     instance = self.cfg.GetInstanceInfo(instance_name)
6440     assert instance is not None
6441     self.instance = instance
6442
6443     if (not self.cleanup and not instance.admin_up and not self.failover and
6444         self.fallback):
6445       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6446                       " to failover")
6447       self.failover = True
6448
6449     if instance.disk_template not in constants.DTS_MIRRORED:
6450       if self.failover:
6451         text = "failovers"
6452       else:
6453         text = "migrations"
6454       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6455                                  " %s" % (instance.disk_template, text),
6456                                  errors.ECODE_STATE)
6457
6458     if instance.disk_template in constants.DTS_EXT_MIRROR:
6459       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6460
6461       if self.lu.op.iallocator:
6462         self._RunAllocator()
6463       else:
6464         # We set set self.target_node as it is required by
6465         # BuildHooksEnv
6466         self.target_node = self.lu.op.target_node
6467
6468       # self.target_node is already populated, either directly or by the
6469       # iallocator run
6470       target_node = self.target_node
6471
6472       if len(self.lu.tasklets) == 1:
6473         # It is safe to release locks only when we're the only tasklet
6474         # in the LU
6475         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6476                       keep=[instance.primary_node, self.target_node])
6477
6478     else:
6479       secondary_nodes = instance.secondary_nodes
6480       if not secondary_nodes:
6481         raise errors.ConfigurationError("No secondary node but using"
6482                                         " %s disk template" %
6483                                         instance.disk_template)
6484       target_node = secondary_nodes[0]
6485       if self.lu.op.iallocator or (self.lu.op.target_node and
6486                                    self.lu.op.target_node != target_node):
6487         if self.failover:
6488           text = "failed over"
6489         else:
6490           text = "migrated"
6491         raise errors.OpPrereqError("Instances with disk template %s cannot"
6492                                    " be %s to arbitrary nodes"
6493                                    " (neither an iallocator nor a target"
6494                                    " node can be passed)" %
6495                                    (instance.disk_template, text),
6496                                    errors.ECODE_INVAL)
6497
6498     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6499
6500     # check memory requirements on the secondary node
6501     if not self.failover or instance.admin_up:
6502       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6503                            instance.name, i_be[constants.BE_MEMORY],
6504                            instance.hypervisor)
6505     else:
6506       self.lu.LogInfo("Not checking memory on the secondary node as"
6507                       " instance will not be started")
6508
6509     # check bridge existance
6510     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6511
6512     if not self.cleanup:
6513       _CheckNodeNotDrained(self.lu, target_node)
6514       if not self.failover:
6515         result = self.rpc.call_instance_migratable(instance.primary_node,
6516                                                    instance)
6517         if result.fail_msg and self.fallback:
6518           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6519                           " failover")
6520           self.failover = True
6521         else:
6522           result.Raise("Can't migrate, please use failover",
6523                        prereq=True, ecode=errors.ECODE_STATE)
6524
6525     assert not (self.failover and self.cleanup)
6526
6527     if not self.failover:
6528       if self.lu.op.live is not None and self.lu.op.mode is not None:
6529         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6530                                    " parameters are accepted",
6531                                    errors.ECODE_INVAL)
6532       if self.lu.op.live is not None:
6533         if self.lu.op.live:
6534           self.lu.op.mode = constants.HT_MIGRATION_LIVE
6535         else:
6536           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6537         # reset the 'live' parameter to None so that repeated
6538         # invocations of CheckPrereq do not raise an exception
6539         self.lu.op.live = None
6540       elif self.lu.op.mode is None:
6541         # read the default value from the hypervisor
6542         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6543                                                 skip_globals=False)
6544         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6545
6546       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6547     else:
6548       # Failover is never live
6549       self.live = False
6550
6551   def _RunAllocator(self):
6552     """Run the allocator based on input opcode.
6553
6554     """
6555     ial = IAllocator(self.cfg, self.rpc,
6556                      mode=constants.IALLOCATOR_MODE_RELOC,
6557                      name=self.instance_name,
6558                      # TODO See why hail breaks with a single node below
6559                      relocate_from=[self.instance.primary_node,
6560                                     self.instance.primary_node],
6561                      )
6562
6563     ial.Run(self.lu.op.iallocator)
6564
6565     if not ial.success:
6566       raise errors.OpPrereqError("Can't compute nodes using"
6567                                  " iallocator '%s': %s" %
6568                                  (self.lu.op.iallocator, ial.info),
6569                                  errors.ECODE_NORES)
6570     if len(ial.result) != ial.required_nodes:
6571       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6572                                  " of nodes (%s), required %s" %
6573                                  (self.lu.op.iallocator, len(ial.result),
6574                                   ial.required_nodes), errors.ECODE_FAULT)
6575     self.target_node = ial.result[0]
6576     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6577                  self.instance_name, self.lu.op.iallocator,
6578                  utils.CommaJoin(ial.result))
6579
6580   def _WaitUntilSync(self):
6581     """Poll with custom rpc for disk sync.
6582
6583     This uses our own step-based rpc call.
6584
6585     """
6586     self.feedback_fn("* wait until resync is done")
6587     all_done = False
6588     while not all_done:
6589       all_done = True
6590       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6591                                             self.nodes_ip,
6592                                             self.instance.disks)
6593       min_percent = 100
6594       for node, nres in result.items():
6595         nres.Raise("Cannot resync disks on node %s" % node)
6596         node_done, node_percent = nres.payload
6597         all_done = all_done and node_done
6598         if node_percent is not None:
6599           min_percent = min(min_percent, node_percent)
6600       if not all_done:
6601         if min_percent < 100:
6602           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6603         time.sleep(2)
6604
6605   def _EnsureSecondary(self, node):
6606     """Demote a node to secondary.
6607
6608     """
6609     self.feedback_fn("* switching node %s to secondary mode" % node)
6610
6611     for dev in self.instance.disks:
6612       self.cfg.SetDiskID(dev, node)
6613
6614     result = self.rpc.call_blockdev_close(node, self.instance.name,
6615                                           self.instance.disks)
6616     result.Raise("Cannot change disk to secondary on node %s" % node)
6617
6618   def _GoStandalone(self):
6619     """Disconnect from the network.
6620
6621     """
6622     self.feedback_fn("* changing into standalone mode")
6623     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6624                                                self.instance.disks)
6625     for node, nres in result.items():
6626       nres.Raise("Cannot disconnect disks node %s" % node)
6627
6628   def _GoReconnect(self, multimaster):
6629     """Reconnect to the network.
6630
6631     """
6632     if multimaster:
6633       msg = "dual-master"
6634     else:
6635       msg = "single-master"
6636     self.feedback_fn("* changing disks into %s mode" % msg)
6637     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6638                                            self.instance.disks,
6639                                            self.instance.name, multimaster)
6640     for node, nres in result.items():
6641       nres.Raise("Cannot change disks config on node %s" % node)
6642
6643   def _ExecCleanup(self):
6644     """Try to cleanup after a failed migration.
6645
6646     The cleanup is done by:
6647       - check that the instance is running only on one node
6648         (and update the config if needed)
6649       - change disks on its secondary node to secondary
6650       - wait until disks are fully synchronized
6651       - disconnect from the network
6652       - change disks into single-master mode
6653       - wait again until disks are fully synchronized
6654
6655     """
6656     instance = self.instance
6657     target_node = self.target_node
6658     source_node = self.source_node
6659
6660     # check running on only one node
6661     self.feedback_fn("* checking where the instance actually runs"
6662                      " (if this hangs, the hypervisor might be in"
6663                      " a bad state)")
6664     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6665     for node, result in ins_l.items():
6666       result.Raise("Can't contact node %s" % node)
6667
6668     runningon_source = instance.name in ins_l[source_node].payload
6669     runningon_target = instance.name in ins_l[target_node].payload
6670
6671     if runningon_source and runningon_target:
6672       raise errors.OpExecError("Instance seems to be running on two nodes,"
6673                                " or the hypervisor is confused; you will have"
6674                                " to ensure manually that it runs only on one"
6675                                " and restart this operation")
6676
6677     if not (runningon_source or runningon_target):
6678       raise errors.OpExecError("Instance does not seem to be running at all;"
6679                                " in this case it's safer to repair by"
6680                                " running 'gnt-instance stop' to ensure disk"
6681                                " shutdown, and then restarting it")
6682
6683     if runningon_target:
6684       # the migration has actually succeeded, we need to update the config
6685       self.feedback_fn("* instance running on secondary node (%s),"
6686                        " updating config" % target_node)
6687       instance.primary_node = target_node
6688       self.cfg.Update(instance, self.feedback_fn)
6689       demoted_node = source_node
6690     else:
6691       self.feedback_fn("* instance confirmed to be running on its"
6692                        " primary node (%s)" % source_node)
6693       demoted_node = target_node
6694
6695     if instance.disk_template in constants.DTS_INT_MIRROR:
6696       self._EnsureSecondary(demoted_node)
6697       try:
6698         self._WaitUntilSync()
6699       except errors.OpExecError:
6700         # we ignore here errors, since if the device is standalone, it
6701         # won't be able to sync
6702         pass
6703       self._GoStandalone()
6704       self._GoReconnect(False)
6705       self._WaitUntilSync()
6706
6707     self.feedback_fn("* done")
6708
6709   def _RevertDiskStatus(self):
6710     """Try to revert the disk status after a failed migration.
6711
6712     """
6713     target_node = self.target_node
6714     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6715       return
6716
6717     try:
6718       self._EnsureSecondary(target_node)
6719       self._GoStandalone()
6720       self._GoReconnect(False)
6721       self._WaitUntilSync()
6722     except errors.OpExecError, err:
6723       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
6724                          " please try to recover the instance manually;"
6725                          " error '%s'" % str(err))
6726
6727   def _AbortMigration(self):
6728     """Call the hypervisor code to abort a started migration.
6729
6730     """
6731     instance = self.instance
6732     target_node = self.target_node
6733     migration_info = self.migration_info
6734
6735     abort_result = self.rpc.call_finalize_migration(target_node,
6736                                                     instance,
6737                                                     migration_info,
6738                                                     False)
6739     abort_msg = abort_result.fail_msg
6740     if abort_msg:
6741       logging.error("Aborting migration failed on target node %s: %s",
6742                     target_node, abort_msg)
6743       # Don't raise an exception here, as we stil have to try to revert the
6744       # disk status, even if this step failed.
6745
6746   def _ExecMigration(self):
6747     """Migrate an instance.
6748
6749     The migrate is done by:
6750       - change the disks into dual-master mode
6751       - wait until disks are fully synchronized again
6752       - migrate the instance
6753       - change disks on the new secondary node (the old primary) to secondary
6754       - wait until disks are fully synchronized
6755       - change disks into single-master mode
6756
6757     """
6758     instance = self.instance
6759     target_node = self.target_node
6760     source_node = self.source_node
6761
6762     self.feedback_fn("* checking disk consistency between source and target")
6763     for dev in instance.disks:
6764       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6765         raise errors.OpExecError("Disk %s is degraded or not fully"
6766                                  " synchronized on target node,"
6767                                  " aborting migration" % dev.iv_name)
6768
6769     # First get the migration information from the remote node
6770     result = self.rpc.call_migration_info(source_node, instance)
6771     msg = result.fail_msg
6772     if msg:
6773       log_err = ("Failed fetching source migration information from %s: %s" %
6774                  (source_node, msg))
6775       logging.error(log_err)
6776       raise errors.OpExecError(log_err)
6777
6778     self.migration_info = migration_info = result.payload
6779
6780     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6781       # Then switch the disks to master/master mode
6782       self._EnsureSecondary(target_node)
6783       self._GoStandalone()
6784       self._GoReconnect(True)
6785       self._WaitUntilSync()
6786
6787     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6788     result = self.rpc.call_accept_instance(target_node,
6789                                            instance,
6790                                            migration_info,
6791                                            self.nodes_ip[target_node])
6792
6793     msg = result.fail_msg
6794     if msg:
6795       logging.error("Instance pre-migration failed, trying to revert"
6796                     " disk status: %s", msg)
6797       self.feedback_fn("Pre-migration failed, aborting")
6798       self._AbortMigration()
6799       self._RevertDiskStatus()
6800       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6801                                (instance.name, msg))
6802
6803     self.feedback_fn("* migrating instance to %s" % target_node)
6804     result = self.rpc.call_instance_migrate(source_node, instance,
6805                                             self.nodes_ip[target_node],
6806                                             self.live)
6807     msg = result.fail_msg
6808     if msg:
6809       logging.error("Instance migration failed, trying to revert"
6810                     " disk status: %s", msg)
6811       self.feedback_fn("Migration failed, aborting")
6812       self._AbortMigration()
6813       self._RevertDiskStatus()
6814       raise errors.OpExecError("Could not migrate instance %s: %s" %
6815                                (instance.name, msg))
6816
6817     instance.primary_node = target_node
6818     # distribute new instance config to the other nodes
6819     self.cfg.Update(instance, self.feedback_fn)
6820
6821     result = self.rpc.call_finalize_migration(target_node,
6822                                               instance,
6823                                               migration_info,
6824                                               True)
6825     msg = result.fail_msg
6826     if msg:
6827       logging.error("Instance migration succeeded, but finalization failed:"
6828                     " %s", msg)
6829       raise errors.OpExecError("Could not finalize instance migration: %s" %
6830                                msg)
6831
6832     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6833       self._EnsureSecondary(source_node)
6834       self._WaitUntilSync()
6835       self._GoStandalone()
6836       self._GoReconnect(False)
6837       self._WaitUntilSync()
6838
6839     self.feedback_fn("* done")
6840
6841   def _ExecFailover(self):
6842     """Failover an instance.
6843
6844     The failover is done by shutting it down on its present node and
6845     starting it on the secondary.
6846
6847     """
6848     instance = self.instance
6849     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6850
6851     source_node = instance.primary_node
6852     target_node = self.target_node
6853
6854     if instance.admin_up:
6855       self.feedback_fn("* checking disk consistency between source and target")
6856       for dev in instance.disks:
6857         # for drbd, these are drbd over lvm
6858         if not _CheckDiskConsistency(self, dev, target_node, False):
6859           if not self.ignore_consistency:
6860             raise errors.OpExecError("Disk %s is degraded on target node,"
6861                                      " aborting failover" % dev.iv_name)
6862     else:
6863       self.feedback_fn("* not checking disk consistency as instance is not"
6864                        " running")
6865
6866     self.feedback_fn("* shutting down instance on source node")
6867     logging.info("Shutting down instance %s on node %s",
6868                  instance.name, source_node)
6869
6870     result = self.rpc.call_instance_shutdown(source_node, instance,
6871                                              self.shutdown_timeout)
6872     msg = result.fail_msg
6873     if msg:
6874       if self.ignore_consistency or primary_node.offline:
6875         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
6876                            " proceeding anyway; please make sure node"
6877                            " %s is down; error details: %s",
6878                            instance.name, source_node, source_node, msg)
6879       else:
6880         raise errors.OpExecError("Could not shutdown instance %s on"
6881                                  " node %s: %s" %
6882                                  (instance.name, source_node, msg))
6883
6884     self.feedback_fn("* deactivating the instance's disks on source node")
6885     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6886       raise errors.OpExecError("Can't shut down the instance's disks.")
6887
6888     instance.primary_node = target_node
6889     # distribute new instance config to the other nodes
6890     self.cfg.Update(instance, self.feedback_fn)
6891
6892     # Only start the instance if it's marked as up
6893     if instance.admin_up:
6894       self.feedback_fn("* activating the instance's disks on target node")
6895       logging.info("Starting instance %s on node %s",
6896                    instance.name, target_node)
6897
6898       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6899                                            ignore_secondaries=True)
6900       if not disks_ok:
6901         _ShutdownInstanceDisks(self, instance)
6902         raise errors.OpExecError("Can't activate the instance's disks")
6903
6904       self.feedback_fn("* starting the instance on the target node")
6905       result = self.rpc.call_instance_start(target_node, instance, None, None)
6906       msg = result.fail_msg
6907       if msg:
6908         _ShutdownInstanceDisks(self, instance)
6909         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6910                                  (instance.name, target_node, msg))
6911
6912   def Exec(self, feedback_fn):
6913     """Perform the migration.
6914
6915     """
6916     self.feedback_fn = feedback_fn
6917     self.source_node = self.instance.primary_node
6918
6919     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6920     if self.instance.disk_template in constants.DTS_INT_MIRROR:
6921       self.target_node = self.instance.secondary_nodes[0]
6922       # Otherwise self.target_node has been populated either
6923       # directly, or through an iallocator.
6924
6925     self.all_nodes = [self.source_node, self.target_node]
6926     self.nodes_ip = {
6927       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6928       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6929       }
6930
6931     if self.failover:
6932       feedback_fn("Failover instance %s" % self.instance.name)
6933       self._ExecFailover()
6934     else:
6935       feedback_fn("Migrating instance %s" % self.instance.name)
6936
6937       if self.cleanup:
6938         return self._ExecCleanup()
6939       else:
6940         return self._ExecMigration()
6941
6942
6943 def _CreateBlockDev(lu, node, instance, device, force_create,
6944                     info, force_open):
6945   """Create a tree of block devices on a given node.
6946
6947   If this device type has to be created on secondaries, create it and
6948   all its children.
6949
6950   If not, just recurse to children keeping the same 'force' value.
6951
6952   @param lu: the lu on whose behalf we execute
6953   @param node: the node on which to create the device
6954   @type instance: L{objects.Instance}
6955   @param instance: the instance which owns the device
6956   @type device: L{objects.Disk}
6957   @param device: the device to create
6958   @type force_create: boolean
6959   @param force_create: whether to force creation of this device; this
6960       will be change to True whenever we find a device which has
6961       CreateOnSecondary() attribute
6962   @param info: the extra 'metadata' we should attach to the device
6963       (this will be represented as a LVM tag)
6964   @type force_open: boolean
6965   @param force_open: this parameter will be passes to the
6966       L{backend.BlockdevCreate} function where it specifies
6967       whether we run on primary or not, and it affects both
6968       the child assembly and the device own Open() execution
6969
6970   """
6971   if device.CreateOnSecondary():
6972     force_create = True
6973
6974   if device.children:
6975     for child in device.children:
6976       _CreateBlockDev(lu, node, instance, child, force_create,
6977                       info, force_open)
6978
6979   if not force_create:
6980     return
6981
6982   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6983
6984
6985 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6986   """Create a single block device on a given node.
6987
6988   This will not recurse over children of the device, so they must be
6989   created in advance.
6990
6991   @param lu: the lu on whose behalf we execute
6992   @param node: the node on which to create the device
6993   @type instance: L{objects.Instance}
6994   @param instance: the instance which owns the device
6995   @type device: L{objects.Disk}
6996   @param device: the device to create
6997   @param info: the extra 'metadata' we should attach to the device
6998       (this will be represented as a LVM tag)
6999   @type force_open: boolean
7000   @param force_open: this parameter will be passes to the
7001       L{backend.BlockdevCreate} function where it specifies
7002       whether we run on primary or not, and it affects both
7003       the child assembly and the device own Open() execution
7004
7005   """
7006   lu.cfg.SetDiskID(device, node)
7007   result = lu.rpc.call_blockdev_create(node, device, device.size,
7008                                        instance.name, force_open, info)
7009   result.Raise("Can't create block device %s on"
7010                " node %s for instance %s" % (device, node, instance.name))
7011   if device.physical_id is None:
7012     device.physical_id = result.payload
7013
7014
7015 def _GenerateUniqueNames(lu, exts):
7016   """Generate a suitable LV name.
7017
7018   This will generate a logical volume name for the given instance.
7019
7020   """
7021   results = []
7022   for val in exts:
7023     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7024     results.append("%s%s" % (new_id, val))
7025   return results
7026
7027
7028 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7029                          iv_name, p_minor, s_minor):
7030   """Generate a drbd8 device complete with its children.
7031
7032   """
7033   assert len(vgnames) == len(names) == 2
7034   port = lu.cfg.AllocatePort()
7035   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7036   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7037                           logical_id=(vgnames[0], names[0]))
7038   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7039                           logical_id=(vgnames[1], names[1]))
7040   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7041                           logical_id=(primary, secondary, port,
7042                                       p_minor, s_minor,
7043                                       shared_secret),
7044                           children=[dev_data, dev_meta],
7045                           iv_name=iv_name)
7046   return drbd_dev
7047
7048
7049 def _GenerateDiskTemplate(lu, template_name,
7050                           instance_name, primary_node,
7051                           secondary_nodes, disk_info,
7052                           file_storage_dir, file_driver,
7053                           base_index, feedback_fn):
7054   """Generate the entire disk layout for a given template type.
7055
7056   """
7057   #TODO: compute space requirements
7058
7059   vgname = lu.cfg.GetVGName()
7060   disk_count = len(disk_info)
7061   disks = []
7062   if template_name == constants.DT_DISKLESS:
7063     pass
7064   elif template_name == constants.DT_PLAIN:
7065     if len(secondary_nodes) != 0:
7066       raise errors.ProgrammerError("Wrong template configuration")
7067
7068     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7069                                       for i in range(disk_count)])
7070     for idx, disk in enumerate(disk_info):
7071       disk_index = idx + base_index
7072       vg = disk.get(constants.IDISK_VG, vgname)
7073       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7074       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7075                               size=disk[constants.IDISK_SIZE],
7076                               logical_id=(vg, names[idx]),
7077                               iv_name="disk/%d" % disk_index,
7078                               mode=disk[constants.IDISK_MODE])
7079       disks.append(disk_dev)
7080   elif template_name == constants.DT_DRBD8:
7081     if len(secondary_nodes) != 1:
7082       raise errors.ProgrammerError("Wrong template configuration")
7083     remote_node = secondary_nodes[0]
7084     minors = lu.cfg.AllocateDRBDMinor(
7085       [primary_node, remote_node] * len(disk_info), instance_name)
7086
7087     names = []
7088     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7089                                                for i in range(disk_count)]):
7090       names.append(lv_prefix + "_data")
7091       names.append(lv_prefix + "_meta")
7092     for idx, disk in enumerate(disk_info):
7093       disk_index = idx + base_index
7094       data_vg = disk.get(constants.IDISK_VG, vgname)
7095       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7096       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7097                                       disk[constants.IDISK_SIZE],
7098                                       [data_vg, meta_vg],
7099                                       names[idx * 2:idx * 2 + 2],
7100                                       "disk/%d" % disk_index,
7101                                       minors[idx * 2], minors[idx * 2 + 1])
7102       disk_dev.mode = disk[constants.IDISK_MODE]
7103       disks.append(disk_dev)
7104   elif template_name == constants.DT_FILE:
7105     if len(secondary_nodes) != 0:
7106       raise errors.ProgrammerError("Wrong template configuration")
7107
7108     opcodes.RequireFileStorage()
7109
7110     for idx, disk in enumerate(disk_info):
7111       disk_index = idx + base_index
7112       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7113                               size=disk[constants.IDISK_SIZE],
7114                               iv_name="disk/%d" % disk_index,
7115                               logical_id=(file_driver,
7116                                           "%s/disk%d" % (file_storage_dir,
7117                                                          disk_index)),
7118                               mode=disk[constants.IDISK_MODE])
7119       disks.append(disk_dev)
7120   elif template_name == constants.DT_SHARED_FILE:
7121     if len(secondary_nodes) != 0:
7122       raise errors.ProgrammerError("Wrong template configuration")
7123
7124     opcodes.RequireSharedFileStorage()
7125
7126     for idx, disk in enumerate(disk_info):
7127       disk_index = idx + base_index
7128       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7129                               size=disk[constants.IDISK_SIZE],
7130                               iv_name="disk/%d" % disk_index,
7131                               logical_id=(file_driver,
7132                                           "%s/disk%d" % (file_storage_dir,
7133                                                          disk_index)),
7134                               mode=disk[constants.IDISK_MODE])
7135       disks.append(disk_dev)
7136   elif template_name == constants.DT_BLOCK:
7137     if len(secondary_nodes) != 0:
7138       raise errors.ProgrammerError("Wrong template configuration")
7139
7140     for idx, disk in enumerate(disk_info):
7141       disk_index = idx + base_index
7142       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7143                               size=disk[constants.IDISK_SIZE],
7144                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7145                                           disk[constants.IDISK_ADOPT]),
7146                               iv_name="disk/%d" % disk_index,
7147                               mode=disk[constants.IDISK_MODE])
7148       disks.append(disk_dev)
7149
7150   else:
7151     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7152   return disks
7153
7154
7155 def _GetInstanceInfoText(instance):
7156   """Compute that text that should be added to the disk's metadata.
7157
7158   """
7159   return "originstname+%s" % instance.name
7160
7161
7162 def _CalcEta(time_taken, written, total_size):
7163   """Calculates the ETA based on size written and total size.
7164
7165   @param time_taken: The time taken so far
7166   @param written: amount written so far
7167   @param total_size: The total size of data to be written
7168   @return: The remaining time in seconds
7169
7170   """
7171   avg_time = time_taken / float(written)
7172   return (total_size - written) * avg_time
7173
7174
7175 def _WipeDisks(lu, instance):
7176   """Wipes instance disks.
7177
7178   @type lu: L{LogicalUnit}
7179   @param lu: the logical unit on whose behalf we execute
7180   @type instance: L{objects.Instance}
7181   @param instance: the instance whose disks we should create
7182   @return: the success of the wipe
7183
7184   """
7185   node = instance.primary_node
7186
7187   for device in instance.disks:
7188     lu.cfg.SetDiskID(device, node)
7189
7190   logging.info("Pause sync of instance %s disks", instance.name)
7191   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7192
7193   for idx, success in enumerate(result.payload):
7194     if not success:
7195       logging.warn("pause-sync of instance %s for disks %d failed",
7196                    instance.name, idx)
7197
7198   try:
7199     for idx, device in enumerate(instance.disks):
7200       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7201       # MAX_WIPE_CHUNK at max
7202       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7203                             constants.MIN_WIPE_CHUNK_PERCENT)
7204       # we _must_ make this an int, otherwise rounding errors will
7205       # occur
7206       wipe_chunk_size = int(wipe_chunk_size)
7207
7208       lu.LogInfo("* Wiping disk %d", idx)
7209       logging.info("Wiping disk %d for instance %s, node %s using"
7210                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7211
7212       offset = 0
7213       size = device.size
7214       last_output = 0
7215       start_time = time.time()
7216
7217       while offset < size:
7218         wipe_size = min(wipe_chunk_size, size - offset)
7219         logging.debug("Wiping disk %d, offset %s, chunk %s",
7220                       idx, offset, wipe_size)
7221         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7222         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7223                      (idx, offset, wipe_size))
7224         now = time.time()
7225         offset += wipe_size
7226         if now - last_output >= 60:
7227           eta = _CalcEta(now - start_time, offset, size)
7228           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7229                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7230           last_output = now
7231   finally:
7232     logging.info("Resume sync of instance %s disks", instance.name)
7233
7234     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7235
7236     for idx, success in enumerate(result.payload):
7237       if not success:
7238         lu.LogWarning("Resume sync of disk %d failed, please have a"
7239                       " look at the status and troubleshoot the issue", idx)
7240         logging.warn("resume-sync of instance %s for disks %d failed",
7241                      instance.name, idx)
7242
7243
7244 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7245   """Create all disks for an instance.
7246
7247   This abstracts away some work from AddInstance.
7248
7249   @type lu: L{LogicalUnit}
7250   @param lu: the logical unit on whose behalf we execute
7251   @type instance: L{objects.Instance}
7252   @param instance: the instance whose disks we should create
7253   @type to_skip: list
7254   @param to_skip: list of indices to skip
7255   @type target_node: string
7256   @param target_node: if passed, overrides the target node for creation
7257   @rtype: boolean
7258   @return: the success of the creation
7259
7260   """
7261   info = _GetInstanceInfoText(instance)
7262   if target_node is None:
7263     pnode = instance.primary_node
7264     all_nodes = instance.all_nodes
7265   else:
7266     pnode = target_node
7267     all_nodes = [pnode]
7268
7269   if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7270     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7271     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7272
7273     result.Raise("Failed to create directory '%s' on"
7274                  " node %s" % (file_storage_dir, pnode))
7275
7276   # Note: this needs to be kept in sync with adding of disks in
7277   # LUInstanceSetParams
7278   for idx, device in enumerate(instance.disks):
7279     if to_skip and idx in to_skip:
7280       continue
7281     logging.info("Creating volume %s for instance %s",
7282                  device.iv_name, instance.name)
7283     #HARDCODE
7284     for node in all_nodes:
7285       f_create = node == pnode
7286       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7287
7288
7289 def _RemoveDisks(lu, instance, target_node=None):
7290   """Remove all disks for an instance.
7291
7292   This abstracts away some work from `AddInstance()` and
7293   `RemoveInstance()`. Note that in case some of the devices couldn't
7294   be removed, the removal will continue with the other ones (compare
7295   with `_CreateDisks()`).
7296
7297   @type lu: L{LogicalUnit}
7298   @param lu: the logical unit on whose behalf we execute
7299   @type instance: L{objects.Instance}
7300   @param instance: the instance whose disks we should remove
7301   @type target_node: string
7302   @param target_node: used to override the node on which to remove the disks
7303   @rtype: boolean
7304   @return: the success of the removal
7305
7306   """
7307   logging.info("Removing block devices for instance %s", instance.name)
7308
7309   all_result = True
7310   for device in instance.disks:
7311     if target_node:
7312       edata = [(target_node, device)]
7313     else:
7314       edata = device.ComputeNodeTree(instance.primary_node)
7315     for node, disk in edata:
7316       lu.cfg.SetDiskID(disk, node)
7317       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7318       if msg:
7319         lu.LogWarning("Could not remove block device %s on node %s,"
7320                       " continuing anyway: %s", device.iv_name, node, msg)
7321         all_result = False
7322
7323   if instance.disk_template == constants.DT_FILE:
7324     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7325     if target_node:
7326       tgt = target_node
7327     else:
7328       tgt = instance.primary_node
7329     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7330     if result.fail_msg:
7331       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7332                     file_storage_dir, instance.primary_node, result.fail_msg)
7333       all_result = False
7334
7335   return all_result
7336
7337
7338 def _ComputeDiskSizePerVG(disk_template, disks):
7339   """Compute disk size requirements in the volume group
7340
7341   """
7342   def _compute(disks, payload):
7343     """Universal algorithm.
7344
7345     """
7346     vgs = {}
7347     for disk in disks:
7348       vgs[disk[constants.IDISK_VG]] = \
7349         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7350
7351     return vgs
7352
7353   # Required free disk space as a function of disk and swap space
7354   req_size_dict = {
7355     constants.DT_DISKLESS: {},
7356     constants.DT_PLAIN: _compute(disks, 0),
7357     # 128 MB are added for drbd metadata for each disk
7358     constants.DT_DRBD8: _compute(disks, 128),
7359     constants.DT_FILE: {},
7360     constants.DT_SHARED_FILE: {},
7361   }
7362
7363   if disk_template not in req_size_dict:
7364     raise errors.ProgrammerError("Disk template '%s' size requirement"
7365                                  " is unknown" %  disk_template)
7366
7367   return req_size_dict[disk_template]
7368
7369
7370 def _ComputeDiskSize(disk_template, disks):
7371   """Compute disk size requirements in the volume group
7372
7373   """
7374   # Required free disk space as a function of disk and swap space
7375   req_size_dict = {
7376     constants.DT_DISKLESS: None,
7377     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7378     # 128 MB are added for drbd metadata for each disk
7379     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7380     constants.DT_FILE: None,
7381     constants.DT_SHARED_FILE: 0,
7382     constants.DT_BLOCK: 0,
7383   }
7384
7385   if disk_template not in req_size_dict:
7386     raise errors.ProgrammerError("Disk template '%s' size requirement"
7387                                  " is unknown" %  disk_template)
7388
7389   return req_size_dict[disk_template]
7390
7391
7392 def _FilterVmNodes(lu, nodenames):
7393   """Filters out non-vm_capable nodes from a list.
7394
7395   @type lu: L{LogicalUnit}
7396   @param lu: the logical unit for which we check
7397   @type nodenames: list
7398   @param nodenames: the list of nodes on which we should check
7399   @rtype: list
7400   @return: the list of vm-capable nodes
7401
7402   """
7403   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7404   return [name for name in nodenames if name not in vm_nodes]
7405
7406
7407 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7408   """Hypervisor parameter validation.
7409
7410   This function abstract the hypervisor parameter validation to be
7411   used in both instance create and instance modify.
7412
7413   @type lu: L{LogicalUnit}
7414   @param lu: the logical unit for which we check
7415   @type nodenames: list
7416   @param nodenames: the list of nodes on which we should check
7417   @type hvname: string
7418   @param hvname: the name of the hypervisor we should use
7419   @type hvparams: dict
7420   @param hvparams: the parameters which we need to check
7421   @raise errors.OpPrereqError: if the parameters are not valid
7422
7423   """
7424   nodenames = _FilterVmNodes(lu, nodenames)
7425   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7426                                                   hvname,
7427                                                   hvparams)
7428   for node in nodenames:
7429     info = hvinfo[node]
7430     if info.offline:
7431       continue
7432     info.Raise("Hypervisor parameter validation failed on node %s" % node)
7433
7434
7435 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7436   """OS parameters validation.
7437
7438   @type lu: L{LogicalUnit}
7439   @param lu: the logical unit for which we check
7440   @type required: boolean
7441   @param required: whether the validation should fail if the OS is not
7442       found
7443   @type nodenames: list
7444   @param nodenames: the list of nodes on which we should check
7445   @type osname: string
7446   @param osname: the name of the hypervisor we should use
7447   @type osparams: dict
7448   @param osparams: the parameters which we need to check
7449   @raise errors.OpPrereqError: if the parameters are not valid
7450
7451   """
7452   nodenames = _FilterVmNodes(lu, nodenames)
7453   result = lu.rpc.call_os_validate(required, nodenames, osname,
7454                                    [constants.OS_VALIDATE_PARAMETERS],
7455                                    osparams)
7456   for node, nres in result.items():
7457     # we don't check for offline cases since this should be run only
7458     # against the master node and/or an instance's nodes
7459     nres.Raise("OS Parameters validation failed on node %s" % node)
7460     if not nres.payload:
7461       lu.LogInfo("OS %s not found on node %s, validation skipped",
7462                  osname, node)
7463
7464
7465 class LUInstanceCreate(LogicalUnit):
7466   """Create an instance.
7467
7468   """
7469   HPATH = "instance-add"
7470   HTYPE = constants.HTYPE_INSTANCE
7471   REQ_BGL = False
7472
7473   def CheckArguments(self):
7474     """Check arguments.
7475
7476     """
7477     # do not require name_check to ease forward/backward compatibility
7478     # for tools
7479     if self.op.no_install and self.op.start:
7480       self.LogInfo("No-installation mode selected, disabling startup")
7481       self.op.start = False
7482     # validate/normalize the instance name
7483     self.op.instance_name = \
7484       netutils.Hostname.GetNormalizedName(self.op.instance_name)
7485
7486     if self.op.ip_check and not self.op.name_check:
7487       # TODO: make the ip check more flexible and not depend on the name check
7488       raise errors.OpPrereqError("Cannot do IP address check without a name"
7489                                  " check", errors.ECODE_INVAL)
7490
7491     # check nics' parameter names
7492     for nic in self.op.nics:
7493       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7494
7495     # check disks. parameter names and consistent adopt/no-adopt strategy
7496     has_adopt = has_no_adopt = False
7497     for disk in self.op.disks:
7498       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7499       if constants.IDISK_ADOPT in disk:
7500         has_adopt = True
7501       else:
7502         has_no_adopt = True
7503     if has_adopt and has_no_adopt:
7504       raise errors.OpPrereqError("Either all disks are adopted or none is",
7505                                  errors.ECODE_INVAL)
7506     if has_adopt:
7507       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7508         raise errors.OpPrereqError("Disk adoption is not supported for the"
7509                                    " '%s' disk template" %
7510                                    self.op.disk_template,
7511                                    errors.ECODE_INVAL)
7512       if self.op.iallocator is not None:
7513         raise errors.OpPrereqError("Disk adoption not allowed with an"
7514                                    " iallocator script", errors.ECODE_INVAL)
7515       if self.op.mode == constants.INSTANCE_IMPORT:
7516         raise errors.OpPrereqError("Disk adoption not allowed for"
7517                                    " instance import", errors.ECODE_INVAL)
7518     else:
7519       if self.op.disk_template in constants.DTS_MUST_ADOPT:
7520         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7521                                    " but no 'adopt' parameter given" %
7522                                    self.op.disk_template,
7523                                    errors.ECODE_INVAL)
7524
7525     self.adopt_disks = has_adopt
7526
7527     # instance name verification
7528     if self.op.name_check:
7529       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7530       self.op.instance_name = self.hostname1.name
7531       # used in CheckPrereq for ip ping check
7532       self.check_ip = self.hostname1.ip
7533     else:
7534       self.check_ip = None
7535
7536     # file storage checks
7537     if (self.op.file_driver and
7538         not self.op.file_driver in constants.FILE_DRIVER):
7539       raise errors.OpPrereqError("Invalid file driver name '%s'" %
7540                                  self.op.file_driver, errors.ECODE_INVAL)
7541
7542     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7543       raise errors.OpPrereqError("File storage directory path not absolute",
7544                                  errors.ECODE_INVAL)
7545
7546     ### Node/iallocator related checks
7547     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7548
7549     if self.op.pnode is not None:
7550       if self.op.disk_template in constants.DTS_INT_MIRROR:
7551         if self.op.snode is None:
7552           raise errors.OpPrereqError("The networked disk templates need"
7553                                      " a mirror node", errors.ECODE_INVAL)
7554       elif self.op.snode:
7555         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7556                         " template")
7557         self.op.snode = None
7558
7559     self._cds = _GetClusterDomainSecret()
7560
7561     if self.op.mode == constants.INSTANCE_IMPORT:
7562       # On import force_variant must be True, because if we forced it at
7563       # initial install, our only chance when importing it back is that it
7564       # works again!
7565       self.op.force_variant = True
7566
7567       if self.op.no_install:
7568         self.LogInfo("No-installation mode has no effect during import")
7569
7570     elif self.op.mode == constants.INSTANCE_CREATE:
7571       if self.op.os_type is None:
7572         raise errors.OpPrereqError("No guest OS specified",
7573                                    errors.ECODE_INVAL)
7574       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7575         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7576                                    " installation" % self.op.os_type,
7577                                    errors.ECODE_STATE)
7578       if self.op.disk_template is None:
7579         raise errors.OpPrereqError("No disk template specified",
7580                                    errors.ECODE_INVAL)
7581
7582     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7583       # Check handshake to ensure both clusters have the same domain secret
7584       src_handshake = self.op.source_handshake
7585       if not src_handshake:
7586         raise errors.OpPrereqError("Missing source handshake",
7587                                    errors.ECODE_INVAL)
7588
7589       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7590                                                            src_handshake)
7591       if errmsg:
7592         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7593                                    errors.ECODE_INVAL)
7594
7595       # Load and check source CA
7596       self.source_x509_ca_pem = self.op.source_x509_ca
7597       if not self.source_x509_ca_pem:
7598         raise errors.OpPrereqError("Missing source X509 CA",
7599                                    errors.ECODE_INVAL)
7600
7601       try:
7602         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7603                                                     self._cds)
7604       except OpenSSL.crypto.Error, err:
7605         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7606                                    (err, ), errors.ECODE_INVAL)
7607
7608       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7609       if errcode is not None:
7610         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7611                                    errors.ECODE_INVAL)
7612
7613       self.source_x509_ca = cert
7614
7615       src_instance_name = self.op.source_instance_name
7616       if not src_instance_name:
7617         raise errors.OpPrereqError("Missing source instance name",
7618                                    errors.ECODE_INVAL)
7619
7620       self.source_instance_name = \
7621           netutils.GetHostname(name=src_instance_name).name
7622
7623     else:
7624       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7625                                  self.op.mode, errors.ECODE_INVAL)
7626
7627   def ExpandNames(self):
7628     """ExpandNames for CreateInstance.
7629
7630     Figure out the right locks for instance creation.
7631
7632     """
7633     self.needed_locks = {}
7634
7635     instance_name = self.op.instance_name
7636     # this is just a preventive check, but someone might still add this
7637     # instance in the meantime, and creation will fail at lock-add time
7638     if instance_name in self.cfg.GetInstanceList():
7639       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7640                                  instance_name, errors.ECODE_EXISTS)
7641
7642     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7643
7644     if self.op.iallocator:
7645       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7646     else:
7647       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7648       nodelist = [self.op.pnode]
7649       if self.op.snode is not None:
7650         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7651         nodelist.append(self.op.snode)
7652       self.needed_locks[locking.LEVEL_NODE] = nodelist
7653
7654     # in case of import lock the source node too
7655     if self.op.mode == constants.INSTANCE_IMPORT:
7656       src_node = self.op.src_node
7657       src_path = self.op.src_path
7658
7659       if src_path is None:
7660         self.op.src_path = src_path = self.op.instance_name
7661
7662       if src_node is None:
7663         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7664         self.op.src_node = None
7665         if os.path.isabs(src_path):
7666           raise errors.OpPrereqError("Importing an instance from an absolute"
7667                                      " path requires a source node option",
7668                                      errors.ECODE_INVAL)
7669       else:
7670         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7671         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7672           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7673         if not os.path.isabs(src_path):
7674           self.op.src_path = src_path = \
7675             utils.PathJoin(constants.EXPORT_DIR, src_path)
7676
7677   def _RunAllocator(self):
7678     """Run the allocator based on input opcode.
7679
7680     """
7681     nics = [n.ToDict() for n in self.nics]
7682     ial = IAllocator(self.cfg, self.rpc,
7683                      mode=constants.IALLOCATOR_MODE_ALLOC,
7684                      name=self.op.instance_name,
7685                      disk_template=self.op.disk_template,
7686                      tags=[],
7687                      os=self.op.os_type,
7688                      vcpus=self.be_full[constants.BE_VCPUS],
7689                      mem_size=self.be_full[constants.BE_MEMORY],
7690                      disks=self.disks,
7691                      nics=nics,
7692                      hypervisor=self.op.hypervisor,
7693                      )
7694
7695     ial.Run(self.op.iallocator)
7696
7697     if not ial.success:
7698       raise errors.OpPrereqError("Can't compute nodes using"
7699                                  " iallocator '%s': %s" %
7700                                  (self.op.iallocator, ial.info),
7701                                  errors.ECODE_NORES)
7702     if len(ial.result) != ial.required_nodes:
7703       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7704                                  " of nodes (%s), required %s" %
7705                                  (self.op.iallocator, len(ial.result),
7706                                   ial.required_nodes), errors.ECODE_FAULT)
7707     self.op.pnode = ial.result[0]
7708     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7709                  self.op.instance_name, self.op.iallocator,
7710                  utils.CommaJoin(ial.result))
7711     if ial.required_nodes == 2:
7712       self.op.snode = ial.result[1]
7713
7714   def BuildHooksEnv(self):
7715     """Build hooks env.
7716
7717     This runs on master, primary and secondary nodes of the instance.
7718
7719     """
7720     env = {
7721       "ADD_MODE": self.op.mode,
7722       }
7723     if self.op.mode == constants.INSTANCE_IMPORT:
7724       env["SRC_NODE"] = self.op.src_node
7725       env["SRC_PATH"] = self.op.src_path
7726       env["SRC_IMAGES"] = self.src_images
7727
7728     env.update(_BuildInstanceHookEnv(
7729       name=self.op.instance_name,
7730       primary_node=self.op.pnode,
7731       secondary_nodes=self.secondaries,
7732       status=self.op.start,
7733       os_type=self.op.os_type,
7734       memory=self.be_full[constants.BE_MEMORY],
7735       vcpus=self.be_full[constants.BE_VCPUS],
7736       nics=_NICListToTuple(self, self.nics),
7737       disk_template=self.op.disk_template,
7738       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7739              for d in self.disks],
7740       bep=self.be_full,
7741       hvp=self.hv_full,
7742       hypervisor_name=self.op.hypervisor,
7743     ))
7744
7745     return env
7746
7747   def BuildHooksNodes(self):
7748     """Build hooks nodes.
7749
7750     """
7751     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7752     return nl, nl
7753
7754   def _ReadExportInfo(self):
7755     """Reads the export information from disk.
7756
7757     It will override the opcode source node and path with the actual
7758     information, if these two were not specified before.
7759
7760     @return: the export information
7761
7762     """
7763     assert self.op.mode == constants.INSTANCE_IMPORT
7764
7765     src_node = self.op.src_node
7766     src_path = self.op.src_path
7767
7768     if src_node is None:
7769       locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
7770       exp_list = self.rpc.call_export_list(locked_nodes)
7771       found = False
7772       for node in exp_list:
7773         if exp_list[node].fail_msg:
7774           continue
7775         if src_path in exp_list[node].payload:
7776           found = True
7777           self.op.src_node = src_node = node
7778           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7779                                                        src_path)
7780           break
7781       if not found:
7782         raise errors.OpPrereqError("No export found for relative path %s" %
7783                                     src_path, errors.ECODE_INVAL)
7784
7785     _CheckNodeOnline(self, src_node)
7786     result = self.rpc.call_export_info(src_node, src_path)
7787     result.Raise("No export or invalid export found in dir %s" % src_path)
7788
7789     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7790     if not export_info.has_section(constants.INISECT_EXP):
7791       raise errors.ProgrammerError("Corrupted export config",
7792                                    errors.ECODE_ENVIRON)
7793
7794     ei_version = export_info.get(constants.INISECT_EXP, "version")
7795     if (int(ei_version) != constants.EXPORT_VERSION):
7796       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7797                                  (ei_version, constants.EXPORT_VERSION),
7798                                  errors.ECODE_ENVIRON)
7799     return export_info
7800
7801   def _ReadExportParams(self, einfo):
7802     """Use export parameters as defaults.
7803
7804     In case the opcode doesn't specify (as in override) some instance
7805     parameters, then try to use them from the export information, if
7806     that declares them.
7807
7808     """
7809     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7810
7811     if self.op.disk_template is None:
7812       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7813         self.op.disk_template = einfo.get(constants.INISECT_INS,
7814                                           "disk_template")
7815       else:
7816         raise errors.OpPrereqError("No disk template specified and the export"
7817                                    " is missing the disk_template information",
7818                                    errors.ECODE_INVAL)
7819
7820     if not self.op.disks:
7821       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7822         disks = []
7823         # TODO: import the disk iv_name too
7824         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7825           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7826           disks.append({constants.IDISK_SIZE: disk_sz})
7827         self.op.disks = disks
7828       else:
7829         raise errors.OpPrereqError("No disk info specified and the export"
7830                                    " is missing the disk information",
7831                                    errors.ECODE_INVAL)
7832
7833     if (not self.op.nics and
7834         einfo.has_option(constants.INISECT_INS, "nic_count")):
7835       nics = []
7836       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7837         ndict = {}
7838         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7839           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7840           ndict[name] = v
7841         nics.append(ndict)
7842       self.op.nics = nics
7843
7844     if (self.op.hypervisor is None and
7845         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7846       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7847     if einfo.has_section(constants.INISECT_HYP):
7848       # use the export parameters but do not override the ones
7849       # specified by the user
7850       for name, value in einfo.items(constants.INISECT_HYP):
7851         if name not in self.op.hvparams:
7852           self.op.hvparams[name] = value
7853
7854     if einfo.has_section(constants.INISECT_BEP):
7855       # use the parameters, without overriding
7856       for name, value in einfo.items(constants.INISECT_BEP):
7857         if name not in self.op.beparams:
7858           self.op.beparams[name] = value
7859     else:
7860       # try to read the parameters old style, from the main section
7861       for name in constants.BES_PARAMETERS:
7862         if (name not in self.op.beparams and
7863             einfo.has_option(constants.INISECT_INS, name)):
7864           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7865
7866     if einfo.has_section(constants.INISECT_OSP):
7867       # use the parameters, without overriding
7868       for name, value in einfo.items(constants.INISECT_OSP):
7869         if name not in self.op.osparams:
7870           self.op.osparams[name] = value
7871
7872   def _RevertToDefaults(self, cluster):
7873     """Revert the instance parameters to the default values.
7874
7875     """
7876     # hvparams
7877     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7878     for name in self.op.hvparams.keys():
7879       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7880         del self.op.hvparams[name]
7881     # beparams
7882     be_defs = cluster.SimpleFillBE({})
7883     for name in self.op.beparams.keys():
7884       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7885         del self.op.beparams[name]
7886     # nic params
7887     nic_defs = cluster.SimpleFillNIC({})
7888     for nic in self.op.nics:
7889       for name in constants.NICS_PARAMETERS:
7890         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7891           del nic[name]
7892     # osparams
7893     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7894     for name in self.op.osparams.keys():
7895       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7896         del self.op.osparams[name]
7897
7898   def CheckPrereq(self):
7899     """Check prerequisites.
7900
7901     """
7902     if self.op.mode == constants.INSTANCE_IMPORT:
7903       export_info = self._ReadExportInfo()
7904       self._ReadExportParams(export_info)
7905
7906     if (not self.cfg.GetVGName() and
7907         self.op.disk_template not in constants.DTS_NOT_LVM):
7908       raise errors.OpPrereqError("Cluster does not support lvm-based"
7909                                  " instances", errors.ECODE_STATE)
7910
7911     if self.op.hypervisor is None:
7912       self.op.hypervisor = self.cfg.GetHypervisorType()
7913
7914     cluster = self.cfg.GetClusterInfo()
7915     enabled_hvs = cluster.enabled_hypervisors
7916     if self.op.hypervisor not in enabled_hvs:
7917       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7918                                  " cluster (%s)" % (self.op.hypervisor,
7919                                   ",".join(enabled_hvs)),
7920                                  errors.ECODE_STATE)
7921
7922     # check hypervisor parameter syntax (locally)
7923     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7924     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7925                                       self.op.hvparams)
7926     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7927     hv_type.CheckParameterSyntax(filled_hvp)
7928     self.hv_full = filled_hvp
7929     # check that we don't specify global parameters on an instance
7930     _CheckGlobalHvParams(self.op.hvparams)
7931
7932     # fill and remember the beparams dict
7933     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7934     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7935
7936     # build os parameters
7937     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7938
7939     # now that hvp/bep are in final format, let's reset to defaults,
7940     # if told to do so
7941     if self.op.identify_defaults:
7942       self._RevertToDefaults(cluster)
7943
7944     # NIC buildup
7945     self.nics = []
7946     for idx, nic in enumerate(self.op.nics):
7947       nic_mode_req = nic.get(constants.INIC_MODE, None)
7948       nic_mode = nic_mode_req
7949       if nic_mode is None:
7950         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7951
7952       # in routed mode, for the first nic, the default ip is 'auto'
7953       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7954         default_ip_mode = constants.VALUE_AUTO
7955       else:
7956         default_ip_mode = constants.VALUE_NONE
7957
7958       # ip validity checks
7959       ip = nic.get(constants.INIC_IP, default_ip_mode)
7960       if ip is None or ip.lower() == constants.VALUE_NONE:
7961         nic_ip = None
7962       elif ip.lower() == constants.VALUE_AUTO:
7963         if not self.op.name_check:
7964           raise errors.OpPrereqError("IP address set to auto but name checks"
7965                                      " have been skipped",
7966                                      errors.ECODE_INVAL)
7967         nic_ip = self.hostname1.ip
7968       else:
7969         if not netutils.IPAddress.IsValid(ip):
7970           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7971                                      errors.ECODE_INVAL)
7972         nic_ip = ip
7973
7974       # TODO: check the ip address for uniqueness
7975       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7976         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7977                                    errors.ECODE_INVAL)
7978
7979       # MAC address verification
7980       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
7981       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7982         mac = utils.NormalizeAndValidateMac(mac)
7983
7984         try:
7985           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7986         except errors.ReservationError:
7987           raise errors.OpPrereqError("MAC address %s already in use"
7988                                      " in cluster" % mac,
7989                                      errors.ECODE_NOTUNIQUE)
7990
7991       #  Build nic parameters
7992       link = nic.get(constants.INIC_LINK, None)
7993       nicparams = {}
7994       if nic_mode_req:
7995         nicparams[constants.NIC_MODE] = nic_mode_req
7996       if link:
7997         nicparams[constants.NIC_LINK] = link
7998
7999       check_params = cluster.SimpleFillNIC(nicparams)
8000       objects.NIC.CheckParameterSyntax(check_params)
8001       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8002
8003     # disk checks/pre-build
8004     default_vg = self.cfg.GetVGName()
8005     self.disks = []
8006     for disk in self.op.disks:
8007       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8008       if mode not in constants.DISK_ACCESS_SET:
8009         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8010                                    mode, errors.ECODE_INVAL)
8011       size = disk.get(constants.IDISK_SIZE, None)
8012       if size is None:
8013         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8014       try:
8015         size = int(size)
8016       except (TypeError, ValueError):
8017         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8018                                    errors.ECODE_INVAL)
8019
8020       data_vg = disk.get(constants.IDISK_VG, default_vg)
8021       new_disk = {
8022         constants.IDISK_SIZE: size,
8023         constants.IDISK_MODE: mode,
8024         constants.IDISK_VG: data_vg,
8025         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8026         }
8027       if constants.IDISK_ADOPT in disk:
8028         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8029       self.disks.append(new_disk)
8030
8031     if self.op.mode == constants.INSTANCE_IMPORT:
8032
8033       # Check that the new instance doesn't have less disks than the export
8034       instance_disks = len(self.disks)
8035       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8036       if instance_disks < export_disks:
8037         raise errors.OpPrereqError("Not enough disks to import."
8038                                    " (instance: %d, export: %d)" %
8039                                    (instance_disks, export_disks),
8040                                    errors.ECODE_INVAL)
8041
8042       disk_images = []
8043       for idx in range(export_disks):
8044         option = 'disk%d_dump' % idx
8045         if export_info.has_option(constants.INISECT_INS, option):
8046           # FIXME: are the old os-es, disk sizes, etc. useful?
8047           export_name = export_info.get(constants.INISECT_INS, option)
8048           image = utils.PathJoin(self.op.src_path, export_name)
8049           disk_images.append(image)
8050         else:
8051           disk_images.append(False)
8052
8053       self.src_images = disk_images
8054
8055       old_name = export_info.get(constants.INISECT_INS, 'name')
8056       try:
8057         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8058       except (TypeError, ValueError), err:
8059         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8060                                    " an integer: %s" % str(err),
8061                                    errors.ECODE_STATE)
8062       if self.op.instance_name == old_name:
8063         for idx, nic in enumerate(self.nics):
8064           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8065             nic_mac_ini = 'nic%d_mac' % idx
8066             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8067
8068     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8069
8070     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8071     if self.op.ip_check:
8072       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8073         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8074                                    (self.check_ip, self.op.instance_name),
8075                                    errors.ECODE_NOTUNIQUE)
8076
8077     #### mac address generation
8078     # By generating here the mac address both the allocator and the hooks get
8079     # the real final mac address rather than the 'auto' or 'generate' value.
8080     # There is a race condition between the generation and the instance object
8081     # creation, which means that we know the mac is valid now, but we're not
8082     # sure it will be when we actually add the instance. If things go bad
8083     # adding the instance will abort because of a duplicate mac, and the
8084     # creation job will fail.
8085     for nic in self.nics:
8086       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8087         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8088
8089     #### allocator run
8090
8091     if self.op.iallocator is not None:
8092       self._RunAllocator()
8093
8094     #### node related checks
8095
8096     # check primary node
8097     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8098     assert self.pnode is not None, \
8099       "Cannot retrieve locked node %s" % self.op.pnode
8100     if pnode.offline:
8101       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8102                                  pnode.name, errors.ECODE_STATE)
8103     if pnode.drained:
8104       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8105                                  pnode.name, errors.ECODE_STATE)
8106     if not pnode.vm_capable:
8107       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8108                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8109
8110     self.secondaries = []
8111
8112     # mirror node verification
8113     if self.op.disk_template in constants.DTS_INT_MIRROR:
8114       if self.op.snode == pnode.name:
8115         raise errors.OpPrereqError("The secondary node cannot be the"
8116                                    " primary node", errors.ECODE_INVAL)
8117       _CheckNodeOnline(self, self.op.snode)
8118       _CheckNodeNotDrained(self, self.op.snode)
8119       _CheckNodeVmCapable(self, self.op.snode)
8120       self.secondaries.append(self.op.snode)
8121
8122     nodenames = [pnode.name] + self.secondaries
8123
8124     if not self.adopt_disks:
8125       # Check lv size requirements, if not adopting
8126       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8127       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8128
8129     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8130       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8131                                 disk[constants.IDISK_ADOPT])
8132                      for disk in self.disks])
8133       if len(all_lvs) != len(self.disks):
8134         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8135                                    errors.ECODE_INVAL)
8136       for lv_name in all_lvs:
8137         try:
8138           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8139           # to ReserveLV uses the same syntax
8140           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8141         except errors.ReservationError:
8142           raise errors.OpPrereqError("LV named %s used by another instance" %
8143                                      lv_name, errors.ECODE_NOTUNIQUE)
8144
8145       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8146       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8147
8148       node_lvs = self.rpc.call_lv_list([pnode.name],
8149                                        vg_names.payload.keys())[pnode.name]
8150       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8151       node_lvs = node_lvs.payload
8152
8153       delta = all_lvs.difference(node_lvs.keys())
8154       if delta:
8155         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8156                                    utils.CommaJoin(delta),
8157                                    errors.ECODE_INVAL)
8158       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8159       if online_lvs:
8160         raise errors.OpPrereqError("Online logical volumes found, cannot"
8161                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8162                                    errors.ECODE_STATE)
8163       # update the size of disk based on what is found
8164       for dsk in self.disks:
8165         dsk[constants.IDISK_SIZE] = \
8166           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8167                                         dsk[constants.IDISK_ADOPT])][0]))
8168
8169     elif self.op.disk_template == constants.DT_BLOCK:
8170       # Normalize and de-duplicate device paths
8171       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8172                        for disk in self.disks])
8173       if len(all_disks) != len(self.disks):
8174         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8175                                    errors.ECODE_INVAL)
8176       baddisks = [d for d in all_disks
8177                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8178       if baddisks:
8179         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8180                                    " cannot be adopted" %
8181                                    (", ".join(baddisks),
8182                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8183                                    errors.ECODE_INVAL)
8184
8185       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8186                                             list(all_disks))[pnode.name]
8187       node_disks.Raise("Cannot get block device information from node %s" %
8188                        pnode.name)
8189       node_disks = node_disks.payload
8190       delta = all_disks.difference(node_disks.keys())
8191       if delta:
8192         raise errors.OpPrereqError("Missing block device(s): %s" %
8193                                    utils.CommaJoin(delta),
8194                                    errors.ECODE_INVAL)
8195       for dsk in self.disks:
8196         dsk[constants.IDISK_SIZE] = \
8197           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8198
8199     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8200
8201     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8202     # check OS parameters (remotely)
8203     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8204
8205     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8206
8207     # memory check on primary node
8208     if self.op.start:
8209       _CheckNodeFreeMemory(self, self.pnode.name,
8210                            "creating instance %s" % self.op.instance_name,
8211                            self.be_full[constants.BE_MEMORY],
8212                            self.op.hypervisor)
8213
8214     self.dry_run_result = list(nodenames)
8215
8216   def Exec(self, feedback_fn):
8217     """Create and add the instance to the cluster.
8218
8219     """
8220     instance = self.op.instance_name
8221     pnode_name = self.pnode.name
8222
8223     ht_kind = self.op.hypervisor
8224     if ht_kind in constants.HTS_REQ_PORT:
8225       network_port = self.cfg.AllocatePort()
8226     else:
8227       network_port = None
8228
8229     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8230       # this is needed because os.path.join does not accept None arguments
8231       if self.op.file_storage_dir is None:
8232         string_file_storage_dir = ""
8233       else:
8234         string_file_storage_dir = self.op.file_storage_dir
8235
8236       # build the full file storage dir path
8237       if self.op.disk_template == constants.DT_SHARED_FILE:
8238         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8239       else:
8240         get_fsd_fn = self.cfg.GetFileStorageDir
8241
8242       file_storage_dir = utils.PathJoin(get_fsd_fn(),
8243                                         string_file_storage_dir, instance)
8244     else:
8245       file_storage_dir = ""
8246
8247     disks = _GenerateDiskTemplate(self,
8248                                   self.op.disk_template,
8249                                   instance, pnode_name,
8250                                   self.secondaries,
8251                                   self.disks,
8252                                   file_storage_dir,
8253                                   self.op.file_driver,
8254                                   0,
8255                                   feedback_fn)
8256
8257     iobj = objects.Instance(name=instance, os=self.op.os_type,
8258                             primary_node=pnode_name,
8259                             nics=self.nics, disks=disks,
8260                             disk_template=self.op.disk_template,
8261                             admin_up=False,
8262                             network_port=network_port,
8263                             beparams=self.op.beparams,
8264                             hvparams=self.op.hvparams,
8265                             hypervisor=self.op.hypervisor,
8266                             osparams=self.op.osparams,
8267                             )
8268
8269     if self.adopt_disks:
8270       if self.op.disk_template == constants.DT_PLAIN:
8271         # rename LVs to the newly-generated names; we need to construct
8272         # 'fake' LV disks with the old data, plus the new unique_id
8273         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8274         rename_to = []
8275         for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8276           rename_to.append(t_dsk.logical_id)
8277           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8278           self.cfg.SetDiskID(t_dsk, pnode_name)
8279         result = self.rpc.call_blockdev_rename(pnode_name,
8280                                                zip(tmp_disks, rename_to))
8281         result.Raise("Failed to rename adoped LVs")
8282     else:
8283       feedback_fn("* creating instance disks...")
8284       try:
8285         _CreateDisks(self, iobj)
8286       except errors.OpExecError:
8287         self.LogWarning("Device creation failed, reverting...")
8288         try:
8289           _RemoveDisks(self, iobj)
8290         finally:
8291           self.cfg.ReleaseDRBDMinors(instance)
8292           raise
8293
8294     feedback_fn("adding instance %s to cluster config" % instance)
8295
8296     self.cfg.AddInstance(iobj, self.proc.GetECId())
8297
8298     # Declare that we don't want to remove the instance lock anymore, as we've
8299     # added the instance to the config
8300     del self.remove_locks[locking.LEVEL_INSTANCE]
8301
8302     if self.op.mode == constants.INSTANCE_IMPORT:
8303       # Release unused nodes
8304       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8305     else:
8306       # Release all nodes
8307       _ReleaseLocks(self, locking.LEVEL_NODE)
8308
8309     disk_abort = False
8310     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8311       feedback_fn("* wiping instance disks...")
8312       try:
8313         _WipeDisks(self, iobj)
8314       except errors.OpExecError, err:
8315         logging.exception("Wiping disks failed")
8316         self.LogWarning("Wiping instance disks failed (%s)", err)
8317         disk_abort = True
8318
8319     if disk_abort:
8320       # Something is already wrong with the disks, don't do anything else
8321       pass
8322     elif self.op.wait_for_sync:
8323       disk_abort = not _WaitForSync(self, iobj)
8324     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8325       # make sure the disks are not degraded (still sync-ing is ok)
8326       time.sleep(15)
8327       feedback_fn("* checking mirrors status")
8328       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8329     else:
8330       disk_abort = False
8331
8332     if disk_abort:
8333       _RemoveDisks(self, iobj)
8334       self.cfg.RemoveInstance(iobj.name)
8335       # Make sure the instance lock gets removed
8336       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8337       raise errors.OpExecError("There are some degraded disks for"
8338                                " this instance")
8339
8340     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8341       if self.op.mode == constants.INSTANCE_CREATE:
8342         if not self.op.no_install:
8343           feedback_fn("* running the instance OS create scripts...")
8344           # FIXME: pass debug option from opcode to backend
8345           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8346                                                  self.op.debug_level)
8347           result.Raise("Could not add os for instance %s"
8348                        " on node %s" % (instance, pnode_name))
8349
8350       elif self.op.mode == constants.INSTANCE_IMPORT:
8351         feedback_fn("* running the instance OS import scripts...")
8352
8353         transfers = []
8354
8355         for idx, image in enumerate(self.src_images):
8356           if not image:
8357             continue
8358
8359           # FIXME: pass debug option from opcode to backend
8360           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8361                                              constants.IEIO_FILE, (image, ),
8362                                              constants.IEIO_SCRIPT,
8363                                              (iobj.disks[idx], idx),
8364                                              None)
8365           transfers.append(dt)
8366
8367         import_result = \
8368           masterd.instance.TransferInstanceData(self, feedback_fn,
8369                                                 self.op.src_node, pnode_name,
8370                                                 self.pnode.secondary_ip,
8371                                                 iobj, transfers)
8372         if not compat.all(import_result):
8373           self.LogWarning("Some disks for instance %s on node %s were not"
8374                           " imported successfully" % (instance, pnode_name))
8375
8376       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8377         feedback_fn("* preparing remote import...")
8378         # The source cluster will stop the instance before attempting to make a
8379         # connection. In some cases stopping an instance can take a long time,
8380         # hence the shutdown timeout is added to the connection timeout.
8381         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8382                            self.op.source_shutdown_timeout)
8383         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8384
8385         assert iobj.primary_node == self.pnode.name
8386         disk_results = \
8387           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8388                                         self.source_x509_ca,
8389                                         self._cds, timeouts)
8390         if not compat.all(disk_results):
8391           # TODO: Should the instance still be started, even if some disks
8392           # failed to import (valid for local imports, too)?
8393           self.LogWarning("Some disks for instance %s on node %s were not"
8394                           " imported successfully" % (instance, pnode_name))
8395
8396         # Run rename script on newly imported instance
8397         assert iobj.name == instance
8398         feedback_fn("Running rename script for %s" % instance)
8399         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8400                                                    self.source_instance_name,
8401                                                    self.op.debug_level)
8402         if result.fail_msg:
8403           self.LogWarning("Failed to run rename script for %s on node"
8404                           " %s: %s" % (instance, pnode_name, result.fail_msg))
8405
8406       else:
8407         # also checked in the prereq part
8408         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8409                                      % self.op.mode)
8410
8411     if self.op.start:
8412       iobj.admin_up = True
8413       self.cfg.Update(iobj, feedback_fn)
8414       logging.info("Starting instance %s on node %s", instance, pnode_name)
8415       feedback_fn("* starting instance...")
8416       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8417       result.Raise("Could not start instance")
8418
8419     return list(iobj.all_nodes)
8420
8421
8422 class LUInstanceConsole(NoHooksLU):
8423   """Connect to an instance's console.
8424
8425   This is somewhat special in that it returns the command line that
8426   you need to run on the master node in order to connect to the
8427   console.
8428
8429   """
8430   REQ_BGL = False
8431
8432   def ExpandNames(self):
8433     self._ExpandAndLockInstance()
8434
8435   def CheckPrereq(self):
8436     """Check prerequisites.
8437
8438     This checks that the instance is in the cluster.
8439
8440     """
8441     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8442     assert self.instance is not None, \
8443       "Cannot retrieve locked instance %s" % self.op.instance_name
8444     _CheckNodeOnline(self, self.instance.primary_node)
8445
8446   def Exec(self, feedback_fn):
8447     """Connect to the console of an instance
8448
8449     """
8450     instance = self.instance
8451     node = instance.primary_node
8452
8453     node_insts = self.rpc.call_instance_list([node],
8454                                              [instance.hypervisor])[node]
8455     node_insts.Raise("Can't get node information from %s" % node)
8456
8457     if instance.name not in node_insts.payload:
8458       if instance.admin_up:
8459         state = constants.INSTST_ERRORDOWN
8460       else:
8461         state = constants.INSTST_ADMINDOWN
8462       raise errors.OpExecError("Instance %s is not running (state %s)" %
8463                                (instance.name, state))
8464
8465     logging.debug("Connecting to console of %s on %s", instance.name, node)
8466
8467     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8468
8469
8470 def _GetInstanceConsole(cluster, instance):
8471   """Returns console information for an instance.
8472
8473   @type cluster: L{objects.Cluster}
8474   @type instance: L{objects.Instance}
8475   @rtype: dict
8476
8477   """
8478   hyper = hypervisor.GetHypervisor(instance.hypervisor)
8479   # beparams and hvparams are passed separately, to avoid editing the
8480   # instance and then saving the defaults in the instance itself.
8481   hvparams = cluster.FillHV(instance)
8482   beparams = cluster.FillBE(instance)
8483   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8484
8485   assert console.instance == instance.name
8486   assert console.Validate()
8487
8488   return console.ToDict()
8489
8490
8491 class LUInstanceReplaceDisks(LogicalUnit):
8492   """Replace the disks of an instance.
8493
8494   """
8495   HPATH = "mirrors-replace"
8496   HTYPE = constants.HTYPE_INSTANCE
8497   REQ_BGL = False
8498
8499   def CheckArguments(self):
8500     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8501                                   self.op.iallocator)
8502
8503   def ExpandNames(self):
8504     self._ExpandAndLockInstance()
8505
8506     assert locking.LEVEL_NODE not in self.needed_locks
8507     assert locking.LEVEL_NODEGROUP not in self.needed_locks
8508
8509     assert self.op.iallocator is None or self.op.remote_node is None, \
8510       "Conflicting options"
8511
8512     if self.op.remote_node is not None:
8513       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8514
8515       # Warning: do not remove the locking of the new secondary here
8516       # unless DRBD8.AddChildren is changed to work in parallel;
8517       # currently it doesn't since parallel invocations of
8518       # FindUnusedMinor will conflict
8519       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
8520       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8521     else:
8522       self.needed_locks[locking.LEVEL_NODE] = []
8523       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8524
8525       if self.op.iallocator is not None:
8526         # iallocator will select a new node in the same group
8527         self.needed_locks[locking.LEVEL_NODEGROUP] = []
8528
8529     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8530                                    self.op.iallocator, self.op.remote_node,
8531                                    self.op.disks, False, self.op.early_release)
8532
8533     self.tasklets = [self.replacer]
8534
8535   def DeclareLocks(self, level):
8536     if level == locking.LEVEL_NODEGROUP:
8537       assert self.op.remote_node is None
8538       assert self.op.iallocator is not None
8539       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
8540
8541       self.share_locks[locking.LEVEL_NODEGROUP] = 1
8542       self.needed_locks[locking.LEVEL_NODEGROUP] = \
8543         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8544
8545     elif level == locking.LEVEL_NODE:
8546       if self.op.iallocator is not None:
8547         assert self.op.remote_node is None
8548         assert not self.needed_locks[locking.LEVEL_NODE]
8549
8550         # Lock member nodes of all locked groups
8551         self.needed_locks[locking.LEVEL_NODE] = [node_name
8552           for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
8553           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
8554       else:
8555         self._LockInstancesNodes()
8556
8557   def BuildHooksEnv(self):
8558     """Build hooks env.
8559
8560     This runs on the master, the primary and all the secondaries.
8561
8562     """
8563     instance = self.replacer.instance
8564     env = {
8565       "MODE": self.op.mode,
8566       "NEW_SECONDARY": self.op.remote_node,
8567       "OLD_SECONDARY": instance.secondary_nodes[0],
8568       }
8569     env.update(_BuildInstanceHookEnvByObject(self, instance))
8570     return env
8571
8572   def BuildHooksNodes(self):
8573     """Build hooks nodes.
8574
8575     """
8576     instance = self.replacer.instance
8577     nl = [
8578       self.cfg.GetMasterNode(),
8579       instance.primary_node,
8580       ]
8581     if self.op.remote_node is not None:
8582       nl.append(self.op.remote_node)
8583     return nl, nl
8584
8585   def CheckPrereq(self):
8586     """Check prerequisites.
8587
8588     """
8589     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
8590             self.op.iallocator is None)
8591
8592     owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
8593     if owned_groups:
8594       groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
8595       if owned_groups != groups:
8596         raise errors.OpExecError("Node groups used by instance '%s' changed"
8597                                  " since lock was acquired, current list is %r,"
8598                                  " used to be '%s'" %
8599                                  (self.op.instance_name,
8600                                   utils.CommaJoin(groups),
8601                                   utils.CommaJoin(owned_groups)))
8602
8603     return LogicalUnit.CheckPrereq(self)
8604
8605
8606 class TLReplaceDisks(Tasklet):
8607   """Replaces disks for an instance.
8608
8609   Note: Locking is not within the scope of this class.
8610
8611   """
8612   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8613                disks, delay_iallocator, early_release):
8614     """Initializes this class.
8615
8616     """
8617     Tasklet.__init__(self, lu)
8618
8619     # Parameters
8620     self.instance_name = instance_name
8621     self.mode = mode
8622     self.iallocator_name = iallocator_name
8623     self.remote_node = remote_node
8624     self.disks = disks
8625     self.delay_iallocator = delay_iallocator
8626     self.early_release = early_release
8627
8628     # Runtime data
8629     self.instance = None
8630     self.new_node = None
8631     self.target_node = None
8632     self.other_node = None
8633     self.remote_node_info = None
8634     self.node_secondary_ip = None
8635
8636   @staticmethod
8637   def CheckArguments(mode, remote_node, iallocator):
8638     """Helper function for users of this class.
8639
8640     """
8641     # check for valid parameter combination
8642     if mode == constants.REPLACE_DISK_CHG:
8643       if remote_node is None and iallocator is None:
8644         raise errors.OpPrereqError("When changing the secondary either an"
8645                                    " iallocator script must be used or the"
8646                                    " new node given", errors.ECODE_INVAL)
8647
8648       if remote_node is not None and iallocator is not None:
8649         raise errors.OpPrereqError("Give either the iallocator or the new"
8650                                    " secondary, not both", errors.ECODE_INVAL)
8651
8652     elif remote_node is not None or iallocator is not None:
8653       # Not replacing the secondary
8654       raise errors.OpPrereqError("The iallocator and new node options can"
8655                                  " only be used when changing the"
8656                                  " secondary node", errors.ECODE_INVAL)
8657
8658   @staticmethod
8659   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8660     """Compute a new secondary node using an IAllocator.
8661
8662     """
8663     ial = IAllocator(lu.cfg, lu.rpc,
8664                      mode=constants.IALLOCATOR_MODE_RELOC,
8665                      name=instance_name,
8666                      relocate_from=relocate_from)
8667
8668     ial.Run(iallocator_name)
8669
8670     if not ial.success:
8671       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8672                                  " %s" % (iallocator_name, ial.info),
8673                                  errors.ECODE_NORES)
8674
8675     if len(ial.result) != ial.required_nodes:
8676       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8677                                  " of nodes (%s), required %s" %
8678                                  (iallocator_name,
8679                                   len(ial.result), ial.required_nodes),
8680                                  errors.ECODE_FAULT)
8681
8682     remote_node_name = ial.result[0]
8683
8684     lu.LogInfo("Selected new secondary for instance '%s': %s",
8685                instance_name, remote_node_name)
8686
8687     return remote_node_name
8688
8689   def _FindFaultyDisks(self, node_name):
8690     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8691                                     node_name, True)
8692
8693   def _CheckDisksActivated(self, instance):
8694     """Checks if the instance disks are activated.
8695
8696     @param instance: The instance to check disks
8697     @return: True if they are activated, False otherwise
8698
8699     """
8700     nodes = instance.all_nodes
8701
8702     for idx, dev in enumerate(instance.disks):
8703       for node in nodes:
8704         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8705         self.cfg.SetDiskID(dev, node)
8706
8707         result = self.rpc.call_blockdev_find(node, dev)
8708
8709         if result.offline:
8710           continue
8711         elif result.fail_msg or not result.payload:
8712           return False
8713
8714     return True
8715
8716   def CheckPrereq(self):
8717     """Check prerequisites.
8718
8719     This checks that the instance is in the cluster.
8720
8721     """
8722     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8723     assert instance is not None, \
8724       "Cannot retrieve locked instance %s" % self.instance_name
8725
8726     if instance.disk_template != constants.DT_DRBD8:
8727       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8728                                  " instances", errors.ECODE_INVAL)
8729
8730     if len(instance.secondary_nodes) != 1:
8731       raise errors.OpPrereqError("The instance has a strange layout,"
8732                                  " expected one secondary but found %d" %
8733                                  len(instance.secondary_nodes),
8734                                  errors.ECODE_FAULT)
8735
8736     if not self.delay_iallocator:
8737       self._CheckPrereq2()
8738
8739   def _CheckPrereq2(self):
8740     """Check prerequisites, second part.
8741
8742     This function should always be part of CheckPrereq. It was separated and is
8743     now called from Exec because during node evacuation iallocator was only
8744     called with an unmodified cluster model, not taking planned changes into
8745     account.
8746
8747     """
8748     instance = self.instance
8749     secondary_node = instance.secondary_nodes[0]
8750
8751     if self.iallocator_name is None:
8752       remote_node = self.remote_node
8753     else:
8754       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8755                                        instance.name, instance.secondary_nodes)
8756
8757     if remote_node is None:
8758       self.remote_node_info = None
8759     else:
8760       assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
8761              "Remote node '%s' is not locked" % remote_node
8762
8763       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8764       assert self.remote_node_info is not None, \
8765         "Cannot retrieve locked node %s" % remote_node
8766
8767     if remote_node == self.instance.primary_node:
8768       raise errors.OpPrereqError("The specified node is the primary node of"
8769                                  " the instance", errors.ECODE_INVAL)
8770
8771     if remote_node == secondary_node:
8772       raise errors.OpPrereqError("The specified node is already the"
8773                                  " secondary node of the instance",
8774                                  errors.ECODE_INVAL)
8775
8776     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8777                                     constants.REPLACE_DISK_CHG):
8778       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8779                                  errors.ECODE_INVAL)
8780
8781     if self.mode == constants.REPLACE_DISK_AUTO:
8782       if not self._CheckDisksActivated(instance):
8783         raise errors.OpPrereqError("Please run activate-disks on instance %s"
8784                                    " first" % self.instance_name,
8785                                    errors.ECODE_STATE)
8786       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8787       faulty_secondary = self._FindFaultyDisks(secondary_node)
8788
8789       if faulty_primary and faulty_secondary:
8790         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8791                                    " one node and can not be repaired"
8792                                    " automatically" % self.instance_name,
8793                                    errors.ECODE_STATE)
8794
8795       if faulty_primary:
8796         self.disks = faulty_primary
8797         self.target_node = instance.primary_node
8798         self.other_node = secondary_node
8799         check_nodes = [self.target_node, self.other_node]
8800       elif faulty_secondary:
8801         self.disks = faulty_secondary
8802         self.target_node = secondary_node
8803         self.other_node = instance.primary_node
8804         check_nodes = [self.target_node, self.other_node]
8805       else:
8806         self.disks = []
8807         check_nodes = []
8808
8809     else:
8810       # Non-automatic modes
8811       if self.mode == constants.REPLACE_DISK_PRI:
8812         self.target_node = instance.primary_node
8813         self.other_node = secondary_node
8814         check_nodes = [self.target_node, self.other_node]
8815
8816       elif self.mode == constants.REPLACE_DISK_SEC:
8817         self.target_node = secondary_node
8818         self.other_node = instance.primary_node
8819         check_nodes = [self.target_node, self.other_node]
8820
8821       elif self.mode == constants.REPLACE_DISK_CHG:
8822         self.new_node = remote_node
8823         self.other_node = instance.primary_node
8824         self.target_node = secondary_node
8825         check_nodes = [self.new_node, self.other_node]
8826
8827         _CheckNodeNotDrained(self.lu, remote_node)
8828         _CheckNodeVmCapable(self.lu, remote_node)
8829
8830         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8831         assert old_node_info is not None
8832         if old_node_info.offline and not self.early_release:
8833           # doesn't make sense to delay the release
8834           self.early_release = True
8835           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8836                           " early-release mode", secondary_node)
8837
8838       else:
8839         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8840                                      self.mode)
8841
8842       # If not specified all disks should be replaced
8843       if not self.disks:
8844         self.disks = range(len(self.instance.disks))
8845
8846     for node in check_nodes:
8847       _CheckNodeOnline(self.lu, node)
8848
8849     touched_nodes = frozenset(node_name for node_name in [self.new_node,
8850                                                           self.other_node,
8851                                                           self.target_node]
8852                               if node_name is not None)
8853
8854     # Release unneeded node locks
8855     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
8856
8857     # Release any owned node group
8858     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
8859       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
8860
8861     # Check whether disks are valid
8862     for disk_idx in self.disks:
8863       instance.FindDisk(disk_idx)
8864
8865     # Get secondary node IP addresses
8866     self.node_secondary_ip = \
8867       dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
8868            for node_name in touched_nodes)
8869
8870   def Exec(self, feedback_fn):
8871     """Execute disk replacement.
8872
8873     This dispatches the disk replacement to the appropriate handler.
8874
8875     """
8876     if self.delay_iallocator:
8877       self._CheckPrereq2()
8878
8879     if __debug__:
8880       # Verify owned locks before starting operation
8881       owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
8882       assert set(owned_locks) == set(self.node_secondary_ip), \
8883           ("Incorrect node locks, owning %s, expected %s" %
8884            (owned_locks, self.node_secondary_ip.keys()))
8885
8886       owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
8887       assert list(owned_locks) == [self.instance_name], \
8888           "Instance '%s' not locked" % self.instance_name
8889
8890       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
8891           "Should not own any node group lock at this point"
8892
8893     if not self.disks:
8894       feedback_fn("No disks need replacement")
8895       return
8896
8897     feedback_fn("Replacing disk(s) %s for %s" %
8898                 (utils.CommaJoin(self.disks), self.instance.name))
8899
8900     activate_disks = (not self.instance.admin_up)
8901
8902     # Activate the instance disks if we're replacing them on a down instance
8903     if activate_disks:
8904       _StartInstanceDisks(self.lu, self.instance, True)
8905
8906     try:
8907       # Should we replace the secondary node?
8908       if self.new_node is not None:
8909         fn = self._ExecDrbd8Secondary
8910       else:
8911         fn = self._ExecDrbd8DiskOnly
8912
8913       result = fn(feedback_fn)
8914     finally:
8915       # Deactivate the instance disks if we're replacing them on a
8916       # down instance
8917       if activate_disks:
8918         _SafeShutdownInstanceDisks(self.lu, self.instance)
8919
8920     if __debug__:
8921       # Verify owned locks
8922       owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
8923       nodes = frozenset(self.node_secondary_ip)
8924       assert ((self.early_release and not owned_locks) or
8925               (not self.early_release and not (set(owned_locks) - nodes))), \
8926         ("Not owning the correct locks, early_release=%s, owned=%r,"
8927          " nodes=%r" % (self.early_release, owned_locks, nodes))
8928
8929     return result
8930
8931   def _CheckVolumeGroup(self, nodes):
8932     self.lu.LogInfo("Checking volume groups")
8933
8934     vgname = self.cfg.GetVGName()
8935
8936     # Make sure volume group exists on all involved nodes
8937     results = self.rpc.call_vg_list(nodes)
8938     if not results:
8939       raise errors.OpExecError("Can't list volume groups on the nodes")
8940
8941     for node in nodes:
8942       res = results[node]
8943       res.Raise("Error checking node %s" % node)
8944       if vgname not in res.payload:
8945         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8946                                  (vgname, node))
8947
8948   def _CheckDisksExistence(self, nodes):
8949     # Check disk existence
8950     for idx, dev in enumerate(self.instance.disks):
8951       if idx not in self.disks:
8952         continue
8953
8954       for node in nodes:
8955         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8956         self.cfg.SetDiskID(dev, node)
8957
8958         result = self.rpc.call_blockdev_find(node, dev)
8959
8960         msg = result.fail_msg
8961         if msg or not result.payload:
8962           if not msg:
8963             msg = "disk not found"
8964           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8965                                    (idx, node, msg))
8966
8967   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8968     for idx, dev in enumerate(self.instance.disks):
8969       if idx not in self.disks:
8970         continue
8971
8972       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8973                       (idx, node_name))
8974
8975       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8976                                    ldisk=ldisk):
8977         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8978                                  " replace disks for instance %s" %
8979                                  (node_name, self.instance.name))
8980
8981   def _CreateNewStorage(self, node_name):
8982     iv_names = {}
8983
8984     for idx, dev in enumerate(self.instance.disks):
8985       if idx not in self.disks:
8986         continue
8987
8988       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8989
8990       self.cfg.SetDiskID(dev, node_name)
8991
8992       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8993       names = _GenerateUniqueNames(self.lu, lv_names)
8994
8995       vg_data = dev.children[0].logical_id[0]
8996       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8997                              logical_id=(vg_data, names[0]))
8998       vg_meta = dev.children[1].logical_id[0]
8999       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9000                              logical_id=(vg_meta, names[1]))
9001
9002       new_lvs = [lv_data, lv_meta]
9003       old_lvs = dev.children
9004       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9005
9006       # we pass force_create=True to force the LVM creation
9007       for new_lv in new_lvs:
9008         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9009                         _GetInstanceInfoText(self.instance), False)
9010
9011     return iv_names
9012
9013   def _CheckDevices(self, node_name, iv_names):
9014     for name, (dev, _, _) in iv_names.iteritems():
9015       self.cfg.SetDiskID(dev, node_name)
9016
9017       result = self.rpc.call_blockdev_find(node_name, dev)
9018
9019       msg = result.fail_msg
9020       if msg or not result.payload:
9021         if not msg:
9022           msg = "disk not found"
9023         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9024                                  (name, msg))
9025
9026       if result.payload.is_degraded:
9027         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9028
9029   def _RemoveOldStorage(self, node_name, iv_names):
9030     for name, (_, old_lvs, _) in iv_names.iteritems():
9031       self.lu.LogInfo("Remove logical volumes for %s" % name)
9032
9033       for lv in old_lvs:
9034         self.cfg.SetDiskID(lv, node_name)
9035
9036         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9037         if msg:
9038           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9039                              hint="remove unused LVs manually")
9040
9041   def _ExecDrbd8DiskOnly(self, feedback_fn):
9042     """Replace a disk on the primary or secondary for DRBD 8.
9043
9044     The algorithm for replace is quite complicated:
9045
9046       1. for each disk to be replaced:
9047
9048         1. create new LVs on the target node with unique names
9049         1. detach old LVs from the drbd device
9050         1. rename old LVs to name_replaced.<time_t>
9051         1. rename new LVs to old LVs
9052         1. attach the new LVs (with the old names now) to the drbd device
9053
9054       1. wait for sync across all devices
9055
9056       1. for each modified disk:
9057
9058         1. remove old LVs (which have the name name_replaces.<time_t>)
9059
9060     Failures are not very well handled.
9061
9062     """
9063     steps_total = 6
9064
9065     # Step: check device activation
9066     self.lu.LogStep(1, steps_total, "Check device existence")
9067     self._CheckDisksExistence([self.other_node, self.target_node])
9068     self._CheckVolumeGroup([self.target_node, self.other_node])
9069
9070     # Step: check other node consistency
9071     self.lu.LogStep(2, steps_total, "Check peer consistency")
9072     self._CheckDisksConsistency(self.other_node,
9073                                 self.other_node == self.instance.primary_node,
9074                                 False)
9075
9076     # Step: create new storage
9077     self.lu.LogStep(3, steps_total, "Allocate new storage")
9078     iv_names = self._CreateNewStorage(self.target_node)
9079
9080     # Step: for each lv, detach+rename*2+attach
9081     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9082     for dev, old_lvs, new_lvs in iv_names.itervalues():
9083       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9084
9085       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9086                                                      old_lvs)
9087       result.Raise("Can't detach drbd from local storage on node"
9088                    " %s for device %s" % (self.target_node, dev.iv_name))
9089       #dev.children = []
9090       #cfg.Update(instance)
9091
9092       # ok, we created the new LVs, so now we know we have the needed
9093       # storage; as such, we proceed on the target node to rename
9094       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9095       # using the assumption that logical_id == physical_id (which in
9096       # turn is the unique_id on that node)
9097
9098       # FIXME(iustin): use a better name for the replaced LVs
9099       temp_suffix = int(time.time())
9100       ren_fn = lambda d, suff: (d.physical_id[0],
9101                                 d.physical_id[1] + "_replaced-%s" % suff)
9102
9103       # Build the rename list based on what LVs exist on the node
9104       rename_old_to_new = []
9105       for to_ren in old_lvs:
9106         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9107         if not result.fail_msg and result.payload:
9108           # device exists
9109           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9110
9111       self.lu.LogInfo("Renaming the old LVs on the target node")
9112       result = self.rpc.call_blockdev_rename(self.target_node,
9113                                              rename_old_to_new)
9114       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9115
9116       # Now we rename the new LVs to the old LVs
9117       self.lu.LogInfo("Renaming the new LVs on the target node")
9118       rename_new_to_old = [(new, old.physical_id)
9119                            for old, new in zip(old_lvs, new_lvs)]
9120       result = self.rpc.call_blockdev_rename(self.target_node,
9121                                              rename_new_to_old)
9122       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9123
9124       for old, new in zip(old_lvs, new_lvs):
9125         new.logical_id = old.logical_id
9126         self.cfg.SetDiskID(new, self.target_node)
9127
9128       for disk in old_lvs:
9129         disk.logical_id = ren_fn(disk, temp_suffix)
9130         self.cfg.SetDiskID(disk, self.target_node)
9131
9132       # Now that the new lvs have the old name, we can add them to the device
9133       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9134       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9135                                                   new_lvs)
9136       msg = result.fail_msg
9137       if msg:
9138         for new_lv in new_lvs:
9139           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9140                                                new_lv).fail_msg
9141           if msg2:
9142             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9143                                hint=("cleanup manually the unused logical"
9144                                      "volumes"))
9145         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9146
9147       dev.children = new_lvs
9148
9149       self.cfg.Update(self.instance, feedback_fn)
9150
9151     cstep = 5
9152     if self.early_release:
9153       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9154       cstep += 1
9155       self._RemoveOldStorage(self.target_node, iv_names)
9156       # WARNING: we release both node locks here, do not do other RPCs
9157       # than WaitForSync to the primary node
9158       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9159                     names=[self.target_node, self.other_node])
9160
9161     # Wait for sync
9162     # This can fail as the old devices are degraded and _WaitForSync
9163     # does a combined result over all disks, so we don't check its return value
9164     self.lu.LogStep(cstep, steps_total, "Sync devices")
9165     cstep += 1
9166     _WaitForSync(self.lu, self.instance)
9167
9168     # Check all devices manually
9169     self._CheckDevices(self.instance.primary_node, iv_names)
9170
9171     # Step: remove old storage
9172     if not self.early_release:
9173       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9174       cstep += 1
9175       self._RemoveOldStorage(self.target_node, iv_names)
9176
9177   def _ExecDrbd8Secondary(self, feedback_fn):
9178     """Replace the secondary node for DRBD 8.
9179
9180     The algorithm for replace is quite complicated:
9181       - for all disks of the instance:
9182         - create new LVs on the new node with same names
9183         - shutdown the drbd device on the old secondary
9184         - disconnect the drbd network on the primary
9185         - create the drbd device on the new secondary
9186         - network attach the drbd on the primary, using an artifice:
9187           the drbd code for Attach() will connect to the network if it
9188           finds a device which is connected to the good local disks but
9189           not network enabled
9190       - wait for sync across all devices
9191       - remove all disks from the old secondary
9192
9193     Failures are not very well handled.
9194
9195     """
9196     steps_total = 6
9197
9198     # Step: check device activation
9199     self.lu.LogStep(1, steps_total, "Check device existence")
9200     self._CheckDisksExistence([self.instance.primary_node])
9201     self._CheckVolumeGroup([self.instance.primary_node])
9202
9203     # Step: check other node consistency
9204     self.lu.LogStep(2, steps_total, "Check peer consistency")
9205     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9206
9207     # Step: create new storage
9208     self.lu.LogStep(3, steps_total, "Allocate new storage")
9209     for idx, dev in enumerate(self.instance.disks):
9210       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9211                       (self.new_node, idx))
9212       # we pass force_create=True to force LVM creation
9213       for new_lv in dev.children:
9214         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9215                         _GetInstanceInfoText(self.instance), False)
9216
9217     # Step 4: dbrd minors and drbd setups changes
9218     # after this, we must manually remove the drbd minors on both the
9219     # error and the success paths
9220     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9221     minors = self.cfg.AllocateDRBDMinor([self.new_node
9222                                          for dev in self.instance.disks],
9223                                         self.instance.name)
9224     logging.debug("Allocated minors %r", minors)
9225
9226     iv_names = {}
9227     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9228       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9229                       (self.new_node, idx))
9230       # create new devices on new_node; note that we create two IDs:
9231       # one without port, so the drbd will be activated without
9232       # networking information on the new node at this stage, and one
9233       # with network, for the latter activation in step 4
9234       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9235       if self.instance.primary_node == o_node1:
9236         p_minor = o_minor1
9237       else:
9238         assert self.instance.primary_node == o_node2, "Three-node instance?"
9239         p_minor = o_minor2
9240
9241       new_alone_id = (self.instance.primary_node, self.new_node, None,
9242                       p_minor, new_minor, o_secret)
9243       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9244                     p_minor, new_minor, o_secret)
9245
9246       iv_names[idx] = (dev, dev.children, new_net_id)
9247       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9248                     new_net_id)
9249       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9250                               logical_id=new_alone_id,
9251                               children=dev.children,
9252                               size=dev.size)
9253       try:
9254         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9255                               _GetInstanceInfoText(self.instance), False)
9256       except errors.GenericError:
9257         self.cfg.ReleaseDRBDMinors(self.instance.name)
9258         raise
9259
9260     # We have new devices, shutdown the drbd on the old secondary
9261     for idx, dev in enumerate(self.instance.disks):
9262       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9263       self.cfg.SetDiskID(dev, self.target_node)
9264       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9265       if msg:
9266         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9267                            "node: %s" % (idx, msg),
9268                            hint=("Please cleanup this device manually as"
9269                                  " soon as possible"))
9270
9271     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9272     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9273                                                self.node_secondary_ip,
9274                                                self.instance.disks)\
9275                                               [self.instance.primary_node]
9276
9277     msg = result.fail_msg
9278     if msg:
9279       # detaches didn't succeed (unlikely)
9280       self.cfg.ReleaseDRBDMinors(self.instance.name)
9281       raise errors.OpExecError("Can't detach the disks from the network on"
9282                                " old node: %s" % (msg,))
9283
9284     # if we managed to detach at least one, we update all the disks of
9285     # the instance to point to the new secondary
9286     self.lu.LogInfo("Updating instance configuration")
9287     for dev, _, new_logical_id in iv_names.itervalues():
9288       dev.logical_id = new_logical_id
9289       self.cfg.SetDiskID(dev, self.instance.primary_node)
9290
9291     self.cfg.Update(self.instance, feedback_fn)
9292
9293     # and now perform the drbd attach
9294     self.lu.LogInfo("Attaching primary drbds to new secondary"
9295                     " (standalone => connected)")
9296     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9297                                             self.new_node],
9298                                            self.node_secondary_ip,
9299                                            self.instance.disks,
9300                                            self.instance.name,
9301                                            False)
9302     for to_node, to_result in result.items():
9303       msg = to_result.fail_msg
9304       if msg:
9305         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9306                            to_node, msg,
9307                            hint=("please do a gnt-instance info to see the"
9308                                  " status of disks"))
9309     cstep = 5
9310     if self.early_release:
9311       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9312       cstep += 1
9313       self._RemoveOldStorage(self.target_node, iv_names)
9314       # WARNING: we release all node locks here, do not do other RPCs
9315       # than WaitForSync to the primary node
9316       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9317                     names=[self.instance.primary_node,
9318                            self.target_node,
9319                            self.new_node])
9320
9321     # Wait for sync
9322     # This can fail as the old devices are degraded and _WaitForSync
9323     # does a combined result over all disks, so we don't check its return value
9324     self.lu.LogStep(cstep, steps_total, "Sync devices")
9325     cstep += 1
9326     _WaitForSync(self.lu, self.instance)
9327
9328     # Check all devices manually
9329     self._CheckDevices(self.instance.primary_node, iv_names)
9330
9331     # Step: remove old storage
9332     if not self.early_release:
9333       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9334       self._RemoveOldStorage(self.target_node, iv_names)
9335
9336
9337 class LURepairNodeStorage(NoHooksLU):
9338   """Repairs the volume group on a node.
9339
9340   """
9341   REQ_BGL = False
9342
9343   def CheckArguments(self):
9344     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9345
9346     storage_type = self.op.storage_type
9347
9348     if (constants.SO_FIX_CONSISTENCY not in
9349         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9350       raise errors.OpPrereqError("Storage units of type '%s' can not be"
9351                                  " repaired" % storage_type,
9352                                  errors.ECODE_INVAL)
9353
9354   def ExpandNames(self):
9355     self.needed_locks = {
9356       locking.LEVEL_NODE: [self.op.node_name],
9357       }
9358
9359   def _CheckFaultyDisks(self, instance, node_name):
9360     """Ensure faulty disks abort the opcode or at least warn."""
9361     try:
9362       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9363                                   node_name, True):
9364         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9365                                    " node '%s'" % (instance.name, node_name),
9366                                    errors.ECODE_STATE)
9367     except errors.OpPrereqError, err:
9368       if self.op.ignore_consistency:
9369         self.proc.LogWarning(str(err.args[0]))
9370       else:
9371         raise
9372
9373   def CheckPrereq(self):
9374     """Check prerequisites.
9375
9376     """
9377     # Check whether any instance on this node has faulty disks
9378     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9379       if not inst.admin_up:
9380         continue
9381       check_nodes = set(inst.all_nodes)
9382       check_nodes.discard(self.op.node_name)
9383       for inst_node_name in check_nodes:
9384         self._CheckFaultyDisks(inst, inst_node_name)
9385
9386   def Exec(self, feedback_fn):
9387     feedback_fn("Repairing storage unit '%s' on %s ..." %
9388                 (self.op.name, self.op.node_name))
9389
9390     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9391     result = self.rpc.call_storage_execute(self.op.node_name,
9392                                            self.op.storage_type, st_args,
9393                                            self.op.name,
9394                                            constants.SO_FIX_CONSISTENCY)
9395     result.Raise("Failed to repair storage unit '%s' on %s" %
9396                  (self.op.name, self.op.node_name))
9397
9398
9399 class LUNodeEvacStrategy(NoHooksLU):
9400   """Computes the node evacuation strategy.
9401
9402   """
9403   REQ_BGL = False
9404
9405   def CheckArguments(self):
9406     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9407
9408   def ExpandNames(self):
9409     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9410     self.needed_locks = locks = {}
9411     if self.op.remote_node is None:
9412       locks[locking.LEVEL_NODE] = locking.ALL_SET
9413     else:
9414       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9415       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9416
9417   def Exec(self, feedback_fn):
9418     if self.op.remote_node is not None:
9419       instances = []
9420       for node in self.op.nodes:
9421         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9422       result = []
9423       for i in instances:
9424         if i.primary_node == self.op.remote_node:
9425           raise errors.OpPrereqError("Node %s is the primary node of"
9426                                      " instance %s, cannot use it as"
9427                                      " secondary" %
9428                                      (self.op.remote_node, i.name),
9429                                      errors.ECODE_INVAL)
9430         result.append([i.name, self.op.remote_node])
9431     else:
9432       ial = IAllocator(self.cfg, self.rpc,
9433                        mode=constants.IALLOCATOR_MODE_MEVAC,
9434                        evac_nodes=self.op.nodes)
9435       ial.Run(self.op.iallocator, validate=True)
9436       if not ial.success:
9437         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9438                                  errors.ECODE_NORES)
9439       result = ial.result
9440     return result
9441
9442
9443 class LUInstanceGrowDisk(LogicalUnit):
9444   """Grow a disk of an instance.
9445
9446   """
9447   HPATH = "disk-grow"
9448   HTYPE = constants.HTYPE_INSTANCE
9449   REQ_BGL = False
9450
9451   def ExpandNames(self):
9452     self._ExpandAndLockInstance()
9453     self.needed_locks[locking.LEVEL_NODE] = []
9454     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9455
9456   def DeclareLocks(self, level):
9457     if level == locking.LEVEL_NODE:
9458       self._LockInstancesNodes()
9459
9460   def BuildHooksEnv(self):
9461     """Build hooks env.
9462
9463     This runs on the master, the primary and all the secondaries.
9464
9465     """
9466     env = {
9467       "DISK": self.op.disk,
9468       "AMOUNT": self.op.amount,
9469       }
9470     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9471     return env
9472
9473   def BuildHooksNodes(self):
9474     """Build hooks nodes.
9475
9476     """
9477     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9478     return (nl, nl)
9479
9480   def CheckPrereq(self):
9481     """Check prerequisites.
9482
9483     This checks that the instance is in the cluster.
9484
9485     """
9486     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9487     assert instance is not None, \
9488       "Cannot retrieve locked instance %s" % self.op.instance_name
9489     nodenames = list(instance.all_nodes)
9490     for node in nodenames:
9491       _CheckNodeOnline(self, node)
9492
9493     self.instance = instance
9494
9495     if instance.disk_template not in constants.DTS_GROWABLE:
9496       raise errors.OpPrereqError("Instance's disk layout does not support"
9497                                  " growing", errors.ECODE_INVAL)
9498
9499     self.disk = instance.FindDisk(self.op.disk)
9500
9501     if instance.disk_template not in (constants.DT_FILE,
9502                                       constants.DT_SHARED_FILE):
9503       # TODO: check the free disk space for file, when that feature will be
9504       # supported
9505       _CheckNodesFreeDiskPerVG(self, nodenames,
9506                                self.disk.ComputeGrowth(self.op.amount))
9507
9508   def Exec(self, feedback_fn):
9509     """Execute disk grow.
9510
9511     """
9512     instance = self.instance
9513     disk = self.disk
9514
9515     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9516     if not disks_ok:
9517       raise errors.OpExecError("Cannot activate block device to grow")
9518
9519     for node in instance.all_nodes:
9520       self.cfg.SetDiskID(disk, node)
9521       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9522       result.Raise("Grow request failed to node %s" % node)
9523
9524       # TODO: Rewrite code to work properly
9525       # DRBD goes into sync mode for a short amount of time after executing the
9526       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9527       # calling "resize" in sync mode fails. Sleeping for a short amount of
9528       # time is a work-around.
9529       time.sleep(5)
9530
9531     disk.RecordGrow(self.op.amount)
9532     self.cfg.Update(instance, feedback_fn)
9533     if self.op.wait_for_sync:
9534       disk_abort = not _WaitForSync(self, instance, disks=[disk])
9535       if disk_abort:
9536         self.proc.LogWarning("Disk sync-ing has not returned a good"
9537                              " status; please check the instance")
9538       if not instance.admin_up:
9539         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9540     elif not instance.admin_up:
9541       self.proc.LogWarning("Not shutting down the disk even if the instance is"
9542                            " not supposed to be running because no wait for"
9543                            " sync mode was requested")
9544
9545
9546 class LUInstanceQueryData(NoHooksLU):
9547   """Query runtime instance data.
9548
9549   """
9550   REQ_BGL = False
9551
9552   def ExpandNames(self):
9553     self.needed_locks = {}
9554
9555     # Use locking if requested or when non-static information is wanted
9556     if not (self.op.static or self.op.use_locking):
9557       self.LogWarning("Non-static data requested, locks need to be acquired")
9558       self.op.use_locking = True
9559
9560     if self.op.instances or not self.op.use_locking:
9561       # Expand instance names right here
9562       self.wanted_names = _GetWantedInstances(self, self.op.instances)
9563     else:
9564       # Will use acquired locks
9565       self.wanted_names = None
9566
9567     if self.op.use_locking:
9568       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9569
9570       if self.wanted_names is None:
9571         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9572       else:
9573         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9574
9575       self.needed_locks[locking.LEVEL_NODE] = []
9576       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9577       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9578
9579   def DeclareLocks(self, level):
9580     if self.op.use_locking and level == locking.LEVEL_NODE:
9581       self._LockInstancesNodes()
9582
9583   def CheckPrereq(self):
9584     """Check prerequisites.
9585
9586     This only checks the optional instance list against the existing names.
9587
9588     """
9589     if self.wanted_names is None:
9590       assert self.op.use_locking, "Locking was not used"
9591       self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
9592
9593     self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9594                              for name in self.wanted_names]
9595
9596   def _ComputeBlockdevStatus(self, node, instance_name, dev):
9597     """Returns the status of a block device
9598
9599     """
9600     if self.op.static or not node:
9601       return None
9602
9603     self.cfg.SetDiskID(dev, node)
9604
9605     result = self.rpc.call_blockdev_find(node, dev)
9606     if result.offline:
9607       return None
9608
9609     result.Raise("Can't compute disk status for %s" % instance_name)
9610
9611     status = result.payload
9612     if status is None:
9613       return None
9614
9615     return (status.dev_path, status.major, status.minor,
9616             status.sync_percent, status.estimated_time,
9617             status.is_degraded, status.ldisk_status)
9618
9619   def _ComputeDiskStatus(self, instance, snode, dev):
9620     """Compute block device status.
9621
9622     """
9623     if dev.dev_type in constants.LDS_DRBD:
9624       # we change the snode then (otherwise we use the one passed in)
9625       if dev.logical_id[0] == instance.primary_node:
9626         snode = dev.logical_id[1]
9627       else:
9628         snode = dev.logical_id[0]
9629
9630     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9631                                               instance.name, dev)
9632     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9633
9634     if dev.children:
9635       dev_children = [self._ComputeDiskStatus(instance, snode, child)
9636                       for child in dev.children]
9637     else:
9638       dev_children = []
9639
9640     return {
9641       "iv_name": dev.iv_name,
9642       "dev_type": dev.dev_type,
9643       "logical_id": dev.logical_id,
9644       "physical_id": dev.physical_id,
9645       "pstatus": dev_pstatus,
9646       "sstatus": dev_sstatus,
9647       "children": dev_children,
9648       "mode": dev.mode,
9649       "size": dev.size,
9650       }
9651
9652   def Exec(self, feedback_fn):
9653     """Gather and return data"""
9654     result = {}
9655
9656     cluster = self.cfg.GetClusterInfo()
9657
9658     for instance in self.wanted_instances:
9659       if not self.op.static:
9660         remote_info = self.rpc.call_instance_info(instance.primary_node,
9661                                                   instance.name,
9662                                                   instance.hypervisor)
9663         remote_info.Raise("Error checking node %s" % instance.primary_node)
9664         remote_info = remote_info.payload
9665         if remote_info and "state" in remote_info:
9666           remote_state = "up"
9667         else:
9668           remote_state = "down"
9669       else:
9670         remote_state = None
9671       if instance.admin_up:
9672         config_state = "up"
9673       else:
9674         config_state = "down"
9675
9676       disks = [self._ComputeDiskStatus(instance, None, device)
9677                for device in instance.disks]
9678
9679       result[instance.name] = {
9680         "name": instance.name,
9681         "config_state": config_state,
9682         "run_state": remote_state,
9683         "pnode": instance.primary_node,
9684         "snodes": instance.secondary_nodes,
9685         "os": instance.os,
9686         # this happens to be the same format used for hooks
9687         "nics": _NICListToTuple(self, instance.nics),
9688         "disk_template": instance.disk_template,
9689         "disks": disks,
9690         "hypervisor": instance.hypervisor,
9691         "network_port": instance.network_port,
9692         "hv_instance": instance.hvparams,
9693         "hv_actual": cluster.FillHV(instance, skip_globals=True),
9694         "be_instance": instance.beparams,
9695         "be_actual": cluster.FillBE(instance),
9696         "os_instance": instance.osparams,
9697         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9698         "serial_no": instance.serial_no,
9699         "mtime": instance.mtime,
9700         "ctime": instance.ctime,
9701         "uuid": instance.uuid,
9702         }
9703
9704     return result
9705
9706
9707 class LUInstanceSetParams(LogicalUnit):
9708   """Modifies an instances's parameters.
9709
9710   """
9711   HPATH = "instance-modify"
9712   HTYPE = constants.HTYPE_INSTANCE
9713   REQ_BGL = False
9714
9715   def CheckArguments(self):
9716     if not (self.op.nics or self.op.disks or self.op.disk_template or
9717             self.op.hvparams or self.op.beparams or self.op.os_name):
9718       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9719
9720     if self.op.hvparams:
9721       _CheckGlobalHvParams(self.op.hvparams)
9722
9723     # Disk validation
9724     disk_addremove = 0
9725     for disk_op, disk_dict in self.op.disks:
9726       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9727       if disk_op == constants.DDM_REMOVE:
9728         disk_addremove += 1
9729         continue
9730       elif disk_op == constants.DDM_ADD:
9731         disk_addremove += 1
9732       else:
9733         if not isinstance(disk_op, int):
9734           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9735         if not isinstance(disk_dict, dict):
9736           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9737           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9738
9739       if disk_op == constants.DDM_ADD:
9740         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9741         if mode not in constants.DISK_ACCESS_SET:
9742           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9743                                      errors.ECODE_INVAL)
9744         size = disk_dict.get(constants.IDISK_SIZE, None)
9745         if size is None:
9746           raise errors.OpPrereqError("Required disk parameter size missing",
9747                                      errors.ECODE_INVAL)
9748         try:
9749           size = int(size)
9750         except (TypeError, ValueError), err:
9751           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9752                                      str(err), errors.ECODE_INVAL)
9753         disk_dict[constants.IDISK_SIZE] = size
9754       else:
9755         # modification of disk
9756         if constants.IDISK_SIZE in disk_dict:
9757           raise errors.OpPrereqError("Disk size change not possible, use"
9758                                      " grow-disk", errors.ECODE_INVAL)
9759
9760     if disk_addremove > 1:
9761       raise errors.OpPrereqError("Only one disk add or remove operation"
9762                                  " supported at a time", errors.ECODE_INVAL)
9763
9764     if self.op.disks and self.op.disk_template is not None:
9765       raise errors.OpPrereqError("Disk template conversion and other disk"
9766                                  " changes not supported at the same time",
9767                                  errors.ECODE_INVAL)
9768
9769     if (self.op.disk_template and
9770         self.op.disk_template in constants.DTS_INT_MIRROR and
9771         self.op.remote_node is None):
9772       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9773                                  " one requires specifying a secondary node",
9774                                  errors.ECODE_INVAL)
9775
9776     # NIC validation
9777     nic_addremove = 0
9778     for nic_op, nic_dict in self.op.nics:
9779       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9780       if nic_op == constants.DDM_REMOVE:
9781         nic_addremove += 1
9782         continue
9783       elif nic_op == constants.DDM_ADD:
9784         nic_addremove += 1
9785       else:
9786         if not isinstance(nic_op, int):
9787           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9788         if not isinstance(nic_dict, dict):
9789           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9790           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9791
9792       # nic_dict should be a dict
9793       nic_ip = nic_dict.get(constants.INIC_IP, None)
9794       if nic_ip is not None:
9795         if nic_ip.lower() == constants.VALUE_NONE:
9796           nic_dict[constants.INIC_IP] = None
9797         else:
9798           if not netutils.IPAddress.IsValid(nic_ip):
9799             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9800                                        errors.ECODE_INVAL)
9801
9802       nic_bridge = nic_dict.get('bridge', None)
9803       nic_link = nic_dict.get(constants.INIC_LINK, None)
9804       if nic_bridge and nic_link:
9805         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9806                                    " at the same time", errors.ECODE_INVAL)
9807       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9808         nic_dict['bridge'] = None
9809       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9810         nic_dict[constants.INIC_LINK] = None
9811
9812       if nic_op == constants.DDM_ADD:
9813         nic_mac = nic_dict.get(constants.INIC_MAC, None)
9814         if nic_mac is None:
9815           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9816
9817       if constants.INIC_MAC in nic_dict:
9818         nic_mac = nic_dict[constants.INIC_MAC]
9819         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9820           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9821
9822         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9823           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9824                                      " modifying an existing nic",
9825                                      errors.ECODE_INVAL)
9826
9827     if nic_addremove > 1:
9828       raise errors.OpPrereqError("Only one NIC add or remove operation"
9829                                  " supported at a time", errors.ECODE_INVAL)
9830
9831   def ExpandNames(self):
9832     self._ExpandAndLockInstance()
9833     self.needed_locks[locking.LEVEL_NODE] = []
9834     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9835
9836   def DeclareLocks(self, level):
9837     if level == locking.LEVEL_NODE:
9838       self._LockInstancesNodes()
9839       if self.op.disk_template and self.op.remote_node:
9840         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9841         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9842
9843   def BuildHooksEnv(self):
9844     """Build hooks env.
9845
9846     This runs on the master, primary and secondaries.
9847
9848     """
9849     args = dict()
9850     if constants.BE_MEMORY in self.be_new:
9851       args['memory'] = self.be_new[constants.BE_MEMORY]
9852     if constants.BE_VCPUS in self.be_new:
9853       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9854     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9855     # information at all.
9856     if self.op.nics:
9857       args['nics'] = []
9858       nic_override = dict(self.op.nics)
9859       for idx, nic in enumerate(self.instance.nics):
9860         if idx in nic_override:
9861           this_nic_override = nic_override[idx]
9862         else:
9863           this_nic_override = {}
9864         if constants.INIC_IP in this_nic_override:
9865           ip = this_nic_override[constants.INIC_IP]
9866         else:
9867           ip = nic.ip
9868         if constants.INIC_MAC in this_nic_override:
9869           mac = this_nic_override[constants.INIC_MAC]
9870         else:
9871           mac = nic.mac
9872         if idx in self.nic_pnew:
9873           nicparams = self.nic_pnew[idx]
9874         else:
9875           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9876         mode = nicparams[constants.NIC_MODE]
9877         link = nicparams[constants.NIC_LINK]
9878         args['nics'].append((ip, mac, mode, link))
9879       if constants.DDM_ADD in nic_override:
9880         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
9881         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
9882         nicparams = self.nic_pnew[constants.DDM_ADD]
9883         mode = nicparams[constants.NIC_MODE]
9884         link = nicparams[constants.NIC_LINK]
9885         args['nics'].append((ip, mac, mode, link))
9886       elif constants.DDM_REMOVE in nic_override:
9887         del args['nics'][-1]
9888
9889     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9890     if self.op.disk_template:
9891       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9892
9893     return env
9894
9895   def BuildHooksNodes(self):
9896     """Build hooks nodes.
9897
9898     """
9899     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9900     return (nl, nl)
9901
9902   def CheckPrereq(self):
9903     """Check prerequisites.
9904
9905     This only checks the instance list against the existing names.
9906
9907     """
9908     # checking the new params on the primary/secondary nodes
9909
9910     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9911     cluster = self.cluster = self.cfg.GetClusterInfo()
9912     assert self.instance is not None, \
9913       "Cannot retrieve locked instance %s" % self.op.instance_name
9914     pnode = instance.primary_node
9915     nodelist = list(instance.all_nodes)
9916
9917     # OS change
9918     if self.op.os_name and not self.op.force:
9919       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9920                       self.op.force_variant)
9921       instance_os = self.op.os_name
9922     else:
9923       instance_os = instance.os
9924
9925     if self.op.disk_template:
9926       if instance.disk_template == self.op.disk_template:
9927         raise errors.OpPrereqError("Instance already has disk template %s" %
9928                                    instance.disk_template, errors.ECODE_INVAL)
9929
9930       if (instance.disk_template,
9931           self.op.disk_template) not in self._DISK_CONVERSIONS:
9932         raise errors.OpPrereqError("Unsupported disk template conversion from"
9933                                    " %s to %s" % (instance.disk_template,
9934                                                   self.op.disk_template),
9935                                    errors.ECODE_INVAL)
9936       _CheckInstanceDown(self, instance, "cannot change disk template")
9937       if self.op.disk_template in constants.DTS_INT_MIRROR:
9938         if self.op.remote_node == pnode:
9939           raise errors.OpPrereqError("Given new secondary node %s is the same"
9940                                      " as the primary node of the instance" %
9941                                      self.op.remote_node, errors.ECODE_STATE)
9942         _CheckNodeOnline(self, self.op.remote_node)
9943         _CheckNodeNotDrained(self, self.op.remote_node)
9944         # FIXME: here we assume that the old instance type is DT_PLAIN
9945         assert instance.disk_template == constants.DT_PLAIN
9946         disks = [{constants.IDISK_SIZE: d.size,
9947                   constants.IDISK_VG: d.logical_id[0]}
9948                  for d in instance.disks]
9949         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9950         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9951
9952     # hvparams processing
9953     if self.op.hvparams:
9954       hv_type = instance.hypervisor
9955       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9956       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9957       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9958
9959       # local check
9960       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9961       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9962       self.hv_new = hv_new # the new actual values
9963       self.hv_inst = i_hvdict # the new dict (without defaults)
9964     else:
9965       self.hv_new = self.hv_inst = {}
9966
9967     # beparams processing
9968     if self.op.beparams:
9969       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9970                                    use_none=True)
9971       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9972       be_new = cluster.SimpleFillBE(i_bedict)
9973       self.be_new = be_new # the new actual values
9974       self.be_inst = i_bedict # the new dict (without defaults)
9975     else:
9976       self.be_new = self.be_inst = {}
9977
9978     # osparams processing
9979     if self.op.osparams:
9980       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9981       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9982       self.os_inst = i_osdict # the new dict (without defaults)
9983     else:
9984       self.os_inst = {}
9985
9986     self.warn = []
9987
9988     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9989       mem_check_list = [pnode]
9990       if be_new[constants.BE_AUTO_BALANCE]:
9991         # either we changed auto_balance to yes or it was from before
9992         mem_check_list.extend(instance.secondary_nodes)
9993       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9994                                                   instance.hypervisor)
9995       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9996                                          instance.hypervisor)
9997       pninfo = nodeinfo[pnode]
9998       msg = pninfo.fail_msg
9999       if msg:
10000         # Assume the primary node is unreachable and go ahead
10001         self.warn.append("Can't get info from primary node %s: %s" %
10002                          (pnode,  msg))
10003       elif not isinstance(pninfo.payload.get('memory_free', None), int):
10004         self.warn.append("Node data from primary node %s doesn't contain"
10005                          " free memory information" % pnode)
10006       elif instance_info.fail_msg:
10007         self.warn.append("Can't get instance runtime information: %s" %
10008                         instance_info.fail_msg)
10009       else:
10010         if instance_info.payload:
10011           current_mem = int(instance_info.payload['memory'])
10012         else:
10013           # Assume instance not running
10014           # (there is a slight race condition here, but it's not very probable,
10015           # and we have no other way to check)
10016           current_mem = 0
10017         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10018                     pninfo.payload['memory_free'])
10019         if miss_mem > 0:
10020           raise errors.OpPrereqError("This change will prevent the instance"
10021                                      " from starting, due to %d MB of memory"
10022                                      " missing on its primary node" % miss_mem,
10023                                      errors.ECODE_NORES)
10024
10025       if be_new[constants.BE_AUTO_BALANCE]:
10026         for node, nres in nodeinfo.items():
10027           if node not in instance.secondary_nodes:
10028             continue
10029           msg = nres.fail_msg
10030           if msg:
10031             self.warn.append("Can't get info from secondary node %s: %s" %
10032                              (node, msg))
10033           elif not isinstance(nres.payload.get('memory_free', None), int):
10034             self.warn.append("Secondary node %s didn't return free"
10035                              " memory information" % node)
10036           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
10037             self.warn.append("Not enough memory to failover instance to"
10038                              " secondary node %s" % node)
10039
10040     # NIC processing
10041     self.nic_pnew = {}
10042     self.nic_pinst = {}
10043     for nic_op, nic_dict in self.op.nics:
10044       if nic_op == constants.DDM_REMOVE:
10045         if not instance.nics:
10046           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10047                                      errors.ECODE_INVAL)
10048         continue
10049       if nic_op != constants.DDM_ADD:
10050         # an existing nic
10051         if not instance.nics:
10052           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10053                                      " no NICs" % nic_op,
10054                                      errors.ECODE_INVAL)
10055         if nic_op < 0 or nic_op >= len(instance.nics):
10056           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10057                                      " are 0 to %d" %
10058                                      (nic_op, len(instance.nics) - 1),
10059                                      errors.ECODE_INVAL)
10060         old_nic_params = instance.nics[nic_op].nicparams
10061         old_nic_ip = instance.nics[nic_op].ip
10062       else:
10063         old_nic_params = {}
10064         old_nic_ip = None
10065
10066       update_params_dict = dict([(key, nic_dict[key])
10067                                  for key in constants.NICS_PARAMETERS
10068                                  if key in nic_dict])
10069
10070       if 'bridge' in nic_dict:
10071         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10072
10073       new_nic_params = _GetUpdatedParams(old_nic_params,
10074                                          update_params_dict)
10075       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10076       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10077       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10078       self.nic_pinst[nic_op] = new_nic_params
10079       self.nic_pnew[nic_op] = new_filled_nic_params
10080       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10081
10082       if new_nic_mode == constants.NIC_MODE_BRIDGED:
10083         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10084         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10085         if msg:
10086           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10087           if self.op.force:
10088             self.warn.append(msg)
10089           else:
10090             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10091       if new_nic_mode == constants.NIC_MODE_ROUTED:
10092         if constants.INIC_IP in nic_dict:
10093           nic_ip = nic_dict[constants.INIC_IP]
10094         else:
10095           nic_ip = old_nic_ip
10096         if nic_ip is None:
10097           raise errors.OpPrereqError('Cannot set the nic ip to None'
10098                                      ' on a routed nic', errors.ECODE_INVAL)
10099       if constants.INIC_MAC in nic_dict:
10100         nic_mac = nic_dict[constants.INIC_MAC]
10101         if nic_mac is None:
10102           raise errors.OpPrereqError('Cannot set the nic mac to None',
10103                                      errors.ECODE_INVAL)
10104         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10105           # otherwise generate the mac
10106           nic_dict[constants.INIC_MAC] = \
10107             self.cfg.GenerateMAC(self.proc.GetECId())
10108         else:
10109           # or validate/reserve the current one
10110           try:
10111             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10112           except errors.ReservationError:
10113             raise errors.OpPrereqError("MAC address %s already in use"
10114                                        " in cluster" % nic_mac,
10115                                        errors.ECODE_NOTUNIQUE)
10116
10117     # DISK processing
10118     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10119       raise errors.OpPrereqError("Disk operations not supported for"
10120                                  " diskless instances",
10121                                  errors.ECODE_INVAL)
10122     for disk_op, _ in self.op.disks:
10123       if disk_op == constants.DDM_REMOVE:
10124         if len(instance.disks) == 1:
10125           raise errors.OpPrereqError("Cannot remove the last disk of"
10126                                      " an instance", errors.ECODE_INVAL)
10127         _CheckInstanceDown(self, instance, "cannot remove disks")
10128
10129       if (disk_op == constants.DDM_ADD and
10130           len(instance.disks) >= constants.MAX_DISKS):
10131         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10132                                    " add more" % constants.MAX_DISKS,
10133                                    errors.ECODE_STATE)
10134       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10135         # an existing disk
10136         if disk_op < 0 or disk_op >= len(instance.disks):
10137           raise errors.OpPrereqError("Invalid disk index %s, valid values"
10138                                      " are 0 to %d" %
10139                                      (disk_op, len(instance.disks)),
10140                                      errors.ECODE_INVAL)
10141
10142     return
10143
10144   def _ConvertPlainToDrbd(self, feedback_fn):
10145     """Converts an instance from plain to drbd.
10146
10147     """
10148     feedback_fn("Converting template to drbd")
10149     instance = self.instance
10150     pnode = instance.primary_node
10151     snode = self.op.remote_node
10152
10153     # create a fake disk info for _GenerateDiskTemplate
10154     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10155                   constants.IDISK_VG: d.logical_id[0]}
10156                  for d in instance.disks]
10157     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10158                                       instance.name, pnode, [snode],
10159                                       disk_info, None, None, 0, feedback_fn)
10160     info = _GetInstanceInfoText(instance)
10161     feedback_fn("Creating aditional volumes...")
10162     # first, create the missing data and meta devices
10163     for disk in new_disks:
10164       # unfortunately this is... not too nice
10165       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10166                             info, True)
10167       for child in disk.children:
10168         _CreateSingleBlockDev(self, snode, instance, child, info, True)
10169     # at this stage, all new LVs have been created, we can rename the
10170     # old ones
10171     feedback_fn("Renaming original volumes...")
10172     rename_list = [(o, n.children[0].logical_id)
10173                    for (o, n) in zip(instance.disks, new_disks)]
10174     result = self.rpc.call_blockdev_rename(pnode, rename_list)
10175     result.Raise("Failed to rename original LVs")
10176
10177     feedback_fn("Initializing DRBD devices...")
10178     # all child devices are in place, we can now create the DRBD devices
10179     for disk in new_disks:
10180       for node in [pnode, snode]:
10181         f_create = node == pnode
10182         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10183
10184     # at this point, the instance has been modified
10185     instance.disk_template = constants.DT_DRBD8
10186     instance.disks = new_disks
10187     self.cfg.Update(instance, feedback_fn)
10188
10189     # disks are created, waiting for sync
10190     disk_abort = not _WaitForSync(self, instance)
10191     if disk_abort:
10192       raise errors.OpExecError("There are some degraded disks for"
10193                                " this instance, please cleanup manually")
10194
10195   def _ConvertDrbdToPlain(self, feedback_fn):
10196     """Converts an instance from drbd to plain.
10197
10198     """
10199     instance = self.instance
10200     assert len(instance.secondary_nodes) == 1
10201     pnode = instance.primary_node
10202     snode = instance.secondary_nodes[0]
10203     feedback_fn("Converting template to plain")
10204
10205     old_disks = instance.disks
10206     new_disks = [d.children[0] for d in old_disks]
10207
10208     # copy over size and mode
10209     for parent, child in zip(old_disks, new_disks):
10210       child.size = parent.size
10211       child.mode = parent.mode
10212
10213     # update instance structure
10214     instance.disks = new_disks
10215     instance.disk_template = constants.DT_PLAIN
10216     self.cfg.Update(instance, feedback_fn)
10217
10218     feedback_fn("Removing volumes on the secondary node...")
10219     for disk in old_disks:
10220       self.cfg.SetDiskID(disk, snode)
10221       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10222       if msg:
10223         self.LogWarning("Could not remove block device %s on node %s,"
10224                         " continuing anyway: %s", disk.iv_name, snode, msg)
10225
10226     feedback_fn("Removing unneeded volumes on the primary node...")
10227     for idx, disk in enumerate(old_disks):
10228       meta = disk.children[1]
10229       self.cfg.SetDiskID(meta, pnode)
10230       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10231       if msg:
10232         self.LogWarning("Could not remove metadata for disk %d on node %s,"
10233                         " continuing anyway: %s", idx, pnode, msg)
10234
10235   def Exec(self, feedback_fn):
10236     """Modifies an instance.
10237
10238     All parameters take effect only at the next restart of the instance.
10239
10240     """
10241     # Process here the warnings from CheckPrereq, as we don't have a
10242     # feedback_fn there.
10243     for warn in self.warn:
10244       feedback_fn("WARNING: %s" % warn)
10245
10246     result = []
10247     instance = self.instance
10248     # disk changes
10249     for disk_op, disk_dict in self.op.disks:
10250       if disk_op == constants.DDM_REMOVE:
10251         # remove the last disk
10252         device = instance.disks.pop()
10253         device_idx = len(instance.disks)
10254         for node, disk in device.ComputeNodeTree(instance.primary_node):
10255           self.cfg.SetDiskID(disk, node)
10256           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10257           if msg:
10258             self.LogWarning("Could not remove disk/%d on node %s: %s,"
10259                             " continuing anyway", device_idx, node, msg)
10260         result.append(("disk/%d" % device_idx, "remove"))
10261       elif disk_op == constants.DDM_ADD:
10262         # add a new disk
10263         if instance.disk_template in (constants.DT_FILE,
10264                                         constants.DT_SHARED_FILE):
10265           file_driver, file_path = instance.disks[0].logical_id
10266           file_path = os.path.dirname(file_path)
10267         else:
10268           file_driver = file_path = None
10269         disk_idx_base = len(instance.disks)
10270         new_disk = _GenerateDiskTemplate(self,
10271                                          instance.disk_template,
10272                                          instance.name, instance.primary_node,
10273                                          instance.secondary_nodes,
10274                                          [disk_dict],
10275                                          file_path,
10276                                          file_driver,
10277                                          disk_idx_base, feedback_fn)[0]
10278         instance.disks.append(new_disk)
10279         info = _GetInstanceInfoText(instance)
10280
10281         logging.info("Creating volume %s for instance %s",
10282                      new_disk.iv_name, instance.name)
10283         # Note: this needs to be kept in sync with _CreateDisks
10284         #HARDCODE
10285         for node in instance.all_nodes:
10286           f_create = node == instance.primary_node
10287           try:
10288             _CreateBlockDev(self, node, instance, new_disk,
10289                             f_create, info, f_create)
10290           except errors.OpExecError, err:
10291             self.LogWarning("Failed to create volume %s (%s) on"
10292                             " node %s: %s",
10293                             new_disk.iv_name, new_disk, node, err)
10294         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10295                        (new_disk.size, new_disk.mode)))
10296       else:
10297         # change a given disk
10298         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10299         result.append(("disk.mode/%d" % disk_op,
10300                        disk_dict[constants.IDISK_MODE]))
10301
10302     if self.op.disk_template:
10303       r_shut = _ShutdownInstanceDisks(self, instance)
10304       if not r_shut:
10305         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10306                                  " proceed with disk template conversion")
10307       mode = (instance.disk_template, self.op.disk_template)
10308       try:
10309         self._DISK_CONVERSIONS[mode](self, feedback_fn)
10310       except:
10311         self.cfg.ReleaseDRBDMinors(instance.name)
10312         raise
10313       result.append(("disk_template", self.op.disk_template))
10314
10315     # NIC changes
10316     for nic_op, nic_dict in self.op.nics:
10317       if nic_op == constants.DDM_REMOVE:
10318         # remove the last nic
10319         del instance.nics[-1]
10320         result.append(("nic.%d" % len(instance.nics), "remove"))
10321       elif nic_op == constants.DDM_ADD:
10322         # mac and bridge should be set, by now
10323         mac = nic_dict[constants.INIC_MAC]
10324         ip = nic_dict.get(constants.INIC_IP, None)
10325         nicparams = self.nic_pinst[constants.DDM_ADD]
10326         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10327         instance.nics.append(new_nic)
10328         result.append(("nic.%d" % (len(instance.nics) - 1),
10329                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
10330                        (new_nic.mac, new_nic.ip,
10331                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10332                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10333                        )))
10334       else:
10335         for key in (constants.INIC_MAC, constants.INIC_IP):
10336           if key in nic_dict:
10337             setattr(instance.nics[nic_op], key, nic_dict[key])
10338         if nic_op in self.nic_pinst:
10339           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10340         for key, val in nic_dict.iteritems():
10341           result.append(("nic.%s/%d" % (key, nic_op), val))
10342
10343     # hvparams changes
10344     if self.op.hvparams:
10345       instance.hvparams = self.hv_inst
10346       for key, val in self.op.hvparams.iteritems():
10347         result.append(("hv/%s" % key, val))
10348
10349     # beparams changes
10350     if self.op.beparams:
10351       instance.beparams = self.be_inst
10352       for key, val in self.op.beparams.iteritems():
10353         result.append(("be/%s" % key, val))
10354
10355     # OS change
10356     if self.op.os_name:
10357       instance.os = self.op.os_name
10358
10359     # osparams changes
10360     if self.op.osparams:
10361       instance.osparams = self.os_inst
10362       for key, val in self.op.osparams.iteritems():
10363         result.append(("os/%s" % key, val))
10364
10365     self.cfg.Update(instance, feedback_fn)
10366
10367     return result
10368
10369   _DISK_CONVERSIONS = {
10370     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10371     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10372     }
10373
10374
10375 class LUBackupQuery(NoHooksLU):
10376   """Query the exports list
10377
10378   """
10379   REQ_BGL = False
10380
10381   def ExpandNames(self):
10382     self.needed_locks = {}
10383     self.share_locks[locking.LEVEL_NODE] = 1
10384     if not self.op.nodes:
10385       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10386     else:
10387       self.needed_locks[locking.LEVEL_NODE] = \
10388         _GetWantedNodes(self, self.op.nodes)
10389
10390   def Exec(self, feedback_fn):
10391     """Compute the list of all the exported system images.
10392
10393     @rtype: dict
10394     @return: a dictionary with the structure node->(export-list)
10395         where export-list is a list of the instances exported on
10396         that node.
10397
10398     """
10399     self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
10400     rpcresult = self.rpc.call_export_list(self.nodes)
10401     result = {}
10402     for node in rpcresult:
10403       if rpcresult[node].fail_msg:
10404         result[node] = False
10405       else:
10406         result[node] = rpcresult[node].payload
10407
10408     return result
10409
10410
10411 class LUBackupPrepare(NoHooksLU):
10412   """Prepares an instance for an export and returns useful information.
10413
10414   """
10415   REQ_BGL = False
10416
10417   def ExpandNames(self):
10418     self._ExpandAndLockInstance()
10419
10420   def CheckPrereq(self):
10421     """Check prerequisites.
10422
10423     """
10424     instance_name = self.op.instance_name
10425
10426     self.instance = self.cfg.GetInstanceInfo(instance_name)
10427     assert self.instance is not None, \
10428           "Cannot retrieve locked instance %s" % self.op.instance_name
10429     _CheckNodeOnline(self, self.instance.primary_node)
10430
10431     self._cds = _GetClusterDomainSecret()
10432
10433   def Exec(self, feedback_fn):
10434     """Prepares an instance for an export.
10435
10436     """
10437     instance = self.instance
10438
10439     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10440       salt = utils.GenerateSecret(8)
10441
10442       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10443       result = self.rpc.call_x509_cert_create(instance.primary_node,
10444                                               constants.RIE_CERT_VALIDITY)
10445       result.Raise("Can't create X509 key and certificate on %s" % result.node)
10446
10447       (name, cert_pem) = result.payload
10448
10449       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10450                                              cert_pem)
10451
10452       return {
10453         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10454         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10455                           salt),
10456         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10457         }
10458
10459     return None
10460
10461
10462 class LUBackupExport(LogicalUnit):
10463   """Export an instance to an image in the cluster.
10464
10465   """
10466   HPATH = "instance-export"
10467   HTYPE = constants.HTYPE_INSTANCE
10468   REQ_BGL = False
10469
10470   def CheckArguments(self):
10471     """Check the arguments.
10472
10473     """
10474     self.x509_key_name = self.op.x509_key_name
10475     self.dest_x509_ca_pem = self.op.destination_x509_ca
10476
10477     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10478       if not self.x509_key_name:
10479         raise errors.OpPrereqError("Missing X509 key name for encryption",
10480                                    errors.ECODE_INVAL)
10481
10482       if not self.dest_x509_ca_pem:
10483         raise errors.OpPrereqError("Missing destination X509 CA",
10484                                    errors.ECODE_INVAL)
10485
10486   def ExpandNames(self):
10487     self._ExpandAndLockInstance()
10488
10489     # Lock all nodes for local exports
10490     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10491       # FIXME: lock only instance primary and destination node
10492       #
10493       # Sad but true, for now we have do lock all nodes, as we don't know where
10494       # the previous export might be, and in this LU we search for it and
10495       # remove it from its current node. In the future we could fix this by:
10496       #  - making a tasklet to search (share-lock all), then create the
10497       #    new one, then one to remove, after
10498       #  - removing the removal operation altogether
10499       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10500
10501   def DeclareLocks(self, level):
10502     """Last minute lock declaration."""
10503     # All nodes are locked anyway, so nothing to do here.
10504
10505   def BuildHooksEnv(self):
10506     """Build hooks env.
10507
10508     This will run on the master, primary node and target node.
10509
10510     """
10511     env = {
10512       "EXPORT_MODE": self.op.mode,
10513       "EXPORT_NODE": self.op.target_node,
10514       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10515       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10516       # TODO: Generic function for boolean env variables
10517       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10518       }
10519
10520     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10521
10522     return env
10523
10524   def BuildHooksNodes(self):
10525     """Build hooks nodes.
10526
10527     """
10528     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10529
10530     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10531       nl.append(self.op.target_node)
10532
10533     return (nl, nl)
10534
10535   def CheckPrereq(self):
10536     """Check prerequisites.
10537
10538     This checks that the instance and node names are valid.
10539
10540     """
10541     instance_name = self.op.instance_name
10542
10543     self.instance = self.cfg.GetInstanceInfo(instance_name)
10544     assert self.instance is not None, \
10545           "Cannot retrieve locked instance %s" % self.op.instance_name
10546     _CheckNodeOnline(self, self.instance.primary_node)
10547
10548     if (self.op.remove_instance and self.instance.admin_up and
10549         not self.op.shutdown):
10550       raise errors.OpPrereqError("Can not remove instance without shutting it"
10551                                  " down before")
10552
10553     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10554       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10555       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10556       assert self.dst_node is not None
10557
10558       _CheckNodeOnline(self, self.dst_node.name)
10559       _CheckNodeNotDrained(self, self.dst_node.name)
10560
10561       self._cds = None
10562       self.dest_disk_info = None
10563       self.dest_x509_ca = None
10564
10565     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10566       self.dst_node = None
10567
10568       if len(self.op.target_node) != len(self.instance.disks):
10569         raise errors.OpPrereqError(("Received destination information for %s"
10570                                     " disks, but instance %s has %s disks") %
10571                                    (len(self.op.target_node), instance_name,
10572                                     len(self.instance.disks)),
10573                                    errors.ECODE_INVAL)
10574
10575       cds = _GetClusterDomainSecret()
10576
10577       # Check X509 key name
10578       try:
10579         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10580       except (TypeError, ValueError), err:
10581         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10582
10583       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10584         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10585                                    errors.ECODE_INVAL)
10586
10587       # Load and verify CA
10588       try:
10589         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10590       except OpenSSL.crypto.Error, err:
10591         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10592                                    (err, ), errors.ECODE_INVAL)
10593
10594       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10595       if errcode is not None:
10596         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10597                                    (msg, ), errors.ECODE_INVAL)
10598
10599       self.dest_x509_ca = cert
10600
10601       # Verify target information
10602       disk_info = []
10603       for idx, disk_data in enumerate(self.op.target_node):
10604         try:
10605           (host, port, magic) = \
10606             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10607         except errors.GenericError, err:
10608           raise errors.OpPrereqError("Target info for disk %s: %s" %
10609                                      (idx, err), errors.ECODE_INVAL)
10610
10611         disk_info.append((host, port, magic))
10612
10613       assert len(disk_info) == len(self.op.target_node)
10614       self.dest_disk_info = disk_info
10615
10616     else:
10617       raise errors.ProgrammerError("Unhandled export mode %r" %
10618                                    self.op.mode)
10619
10620     # instance disk type verification
10621     # TODO: Implement export support for file-based disks
10622     for disk in self.instance.disks:
10623       if disk.dev_type == constants.LD_FILE:
10624         raise errors.OpPrereqError("Export not supported for instances with"
10625                                    " file-based disks", errors.ECODE_INVAL)
10626
10627   def _CleanupExports(self, feedback_fn):
10628     """Removes exports of current instance from all other nodes.
10629
10630     If an instance in a cluster with nodes A..D was exported to node C, its
10631     exports will be removed from the nodes A, B and D.
10632
10633     """
10634     assert self.op.mode != constants.EXPORT_MODE_REMOTE
10635
10636     nodelist = self.cfg.GetNodeList()
10637     nodelist.remove(self.dst_node.name)
10638
10639     # on one-node clusters nodelist will be empty after the removal
10640     # if we proceed the backup would be removed because OpBackupQuery
10641     # substitutes an empty list with the full cluster node list.
10642     iname = self.instance.name
10643     if nodelist:
10644       feedback_fn("Removing old exports for instance %s" % iname)
10645       exportlist = self.rpc.call_export_list(nodelist)
10646       for node in exportlist:
10647         if exportlist[node].fail_msg:
10648           continue
10649         if iname in exportlist[node].payload:
10650           msg = self.rpc.call_export_remove(node, iname).fail_msg
10651           if msg:
10652             self.LogWarning("Could not remove older export for instance %s"
10653                             " on node %s: %s", iname, node, msg)
10654
10655   def Exec(self, feedback_fn):
10656     """Export an instance to an image in the cluster.
10657
10658     """
10659     assert self.op.mode in constants.EXPORT_MODES
10660
10661     instance = self.instance
10662     src_node = instance.primary_node
10663
10664     if self.op.shutdown:
10665       # shutdown the instance, but not the disks
10666       feedback_fn("Shutting down instance %s" % instance.name)
10667       result = self.rpc.call_instance_shutdown(src_node, instance,
10668                                                self.op.shutdown_timeout)
10669       # TODO: Maybe ignore failures if ignore_remove_failures is set
10670       result.Raise("Could not shutdown instance %s on"
10671                    " node %s" % (instance.name, src_node))
10672
10673     # set the disks ID correctly since call_instance_start needs the
10674     # correct drbd minor to create the symlinks
10675     for disk in instance.disks:
10676       self.cfg.SetDiskID(disk, src_node)
10677
10678     activate_disks = (not instance.admin_up)
10679
10680     if activate_disks:
10681       # Activate the instance disks if we'exporting a stopped instance
10682       feedback_fn("Activating disks for %s" % instance.name)
10683       _StartInstanceDisks(self, instance, None)
10684
10685     try:
10686       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10687                                                      instance)
10688
10689       helper.CreateSnapshots()
10690       try:
10691         if (self.op.shutdown and instance.admin_up and
10692             not self.op.remove_instance):
10693           assert not activate_disks
10694           feedback_fn("Starting instance %s" % instance.name)
10695           result = self.rpc.call_instance_start(src_node, instance, None, None)
10696           msg = result.fail_msg
10697           if msg:
10698             feedback_fn("Failed to start instance: %s" % msg)
10699             _ShutdownInstanceDisks(self, instance)
10700             raise errors.OpExecError("Could not start instance: %s" % msg)
10701
10702         if self.op.mode == constants.EXPORT_MODE_LOCAL:
10703           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10704         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10705           connect_timeout = constants.RIE_CONNECT_TIMEOUT
10706           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10707
10708           (key_name, _, _) = self.x509_key_name
10709
10710           dest_ca_pem = \
10711             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10712                                             self.dest_x509_ca)
10713
10714           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10715                                                      key_name, dest_ca_pem,
10716                                                      timeouts)
10717       finally:
10718         helper.Cleanup()
10719
10720       # Check for backwards compatibility
10721       assert len(dresults) == len(instance.disks)
10722       assert compat.all(isinstance(i, bool) for i in dresults), \
10723              "Not all results are boolean: %r" % dresults
10724
10725     finally:
10726       if activate_disks:
10727         feedback_fn("Deactivating disks for %s" % instance.name)
10728         _ShutdownInstanceDisks(self, instance)
10729
10730     if not (compat.all(dresults) and fin_resu):
10731       failures = []
10732       if not fin_resu:
10733         failures.append("export finalization")
10734       if not compat.all(dresults):
10735         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10736                                if not dsk)
10737         failures.append("disk export: disk(s) %s" % fdsk)
10738
10739       raise errors.OpExecError("Export failed, errors in %s" %
10740                                utils.CommaJoin(failures))
10741
10742     # At this point, the export was successful, we can cleanup/finish
10743
10744     # Remove instance if requested
10745     if self.op.remove_instance:
10746       feedback_fn("Removing instance %s" % instance.name)
10747       _RemoveInstance(self, feedback_fn, instance,
10748                       self.op.ignore_remove_failures)
10749
10750     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10751       self._CleanupExports(feedback_fn)
10752
10753     return fin_resu, dresults
10754
10755
10756 class LUBackupRemove(NoHooksLU):
10757   """Remove exports related to the named instance.
10758
10759   """
10760   REQ_BGL = False
10761
10762   def ExpandNames(self):
10763     self.needed_locks = {}
10764     # We need all nodes to be locked in order for RemoveExport to work, but we
10765     # don't need to lock the instance itself, as nothing will happen to it (and
10766     # we can remove exports also for a removed instance)
10767     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10768
10769   def Exec(self, feedback_fn):
10770     """Remove any export.
10771
10772     """
10773     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10774     # If the instance was not found we'll try with the name that was passed in.
10775     # This will only work if it was an FQDN, though.
10776     fqdn_warn = False
10777     if not instance_name:
10778       fqdn_warn = True
10779       instance_name = self.op.instance_name
10780
10781     locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
10782     exportlist = self.rpc.call_export_list(locked_nodes)
10783     found = False
10784     for node in exportlist:
10785       msg = exportlist[node].fail_msg
10786       if msg:
10787         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10788         continue
10789       if instance_name in exportlist[node].payload:
10790         found = True
10791         result = self.rpc.call_export_remove(node, instance_name)
10792         msg = result.fail_msg
10793         if msg:
10794           logging.error("Could not remove export for instance %s"
10795                         " on node %s: %s", instance_name, node, msg)
10796
10797     if fqdn_warn and not found:
10798       feedback_fn("Export not found. If trying to remove an export belonging"
10799                   " to a deleted instance please use its Fully Qualified"
10800                   " Domain Name.")
10801
10802
10803 class LUGroupAdd(LogicalUnit):
10804   """Logical unit for creating node groups.
10805
10806   """
10807   HPATH = "group-add"
10808   HTYPE = constants.HTYPE_GROUP
10809   REQ_BGL = False
10810
10811   def ExpandNames(self):
10812     # We need the new group's UUID here so that we can create and acquire the
10813     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10814     # that it should not check whether the UUID exists in the configuration.
10815     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10816     self.needed_locks = {}
10817     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10818
10819   def CheckPrereq(self):
10820     """Check prerequisites.
10821
10822     This checks that the given group name is not an existing node group
10823     already.
10824
10825     """
10826     try:
10827       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10828     except errors.OpPrereqError:
10829       pass
10830     else:
10831       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10832                                  " node group (UUID: %s)" %
10833                                  (self.op.group_name, existing_uuid),
10834                                  errors.ECODE_EXISTS)
10835
10836     if self.op.ndparams:
10837       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10838
10839   def BuildHooksEnv(self):
10840     """Build hooks env.
10841
10842     """
10843     return {
10844       "GROUP_NAME": self.op.group_name,
10845       }
10846
10847   def BuildHooksNodes(self):
10848     """Build hooks nodes.
10849
10850     """
10851     mn = self.cfg.GetMasterNode()
10852     return ([mn], [mn])
10853
10854   def Exec(self, feedback_fn):
10855     """Add the node group to the cluster.
10856
10857     """
10858     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10859                                   uuid=self.group_uuid,
10860                                   alloc_policy=self.op.alloc_policy,
10861                                   ndparams=self.op.ndparams)
10862
10863     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10864     del self.remove_locks[locking.LEVEL_NODEGROUP]
10865
10866
10867 class LUGroupAssignNodes(NoHooksLU):
10868   """Logical unit for assigning nodes to groups.
10869
10870   """
10871   REQ_BGL = False
10872
10873   def ExpandNames(self):
10874     # These raise errors.OpPrereqError on their own:
10875     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10876     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10877
10878     # We want to lock all the affected nodes and groups. We have readily
10879     # available the list of nodes, and the *destination* group. To gather the
10880     # list of "source" groups, we need to fetch node information.
10881     self.node_data = self.cfg.GetAllNodesInfo()
10882     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10883     affected_groups.add(self.group_uuid)
10884
10885     self.needed_locks = {
10886       locking.LEVEL_NODEGROUP: list(affected_groups),
10887       locking.LEVEL_NODE: self.op.nodes,
10888       }
10889
10890   def CheckPrereq(self):
10891     """Check prerequisites.
10892
10893     """
10894     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10895     instance_data = self.cfg.GetAllInstancesInfo()
10896
10897     if self.group is None:
10898       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10899                                (self.op.group_name, self.group_uuid))
10900
10901     (new_splits, previous_splits) = \
10902       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10903                                              for node in self.op.nodes],
10904                                             self.node_data, instance_data)
10905
10906     if new_splits:
10907       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10908
10909       if not self.op.force:
10910         raise errors.OpExecError("The following instances get split by this"
10911                                  " change and --force was not given: %s" %
10912                                  fmt_new_splits)
10913       else:
10914         self.LogWarning("This operation will split the following instances: %s",
10915                         fmt_new_splits)
10916
10917         if previous_splits:
10918           self.LogWarning("In addition, these already-split instances continue"
10919                           " to be split across groups: %s",
10920                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10921
10922   def Exec(self, feedback_fn):
10923     """Assign nodes to a new group.
10924
10925     """
10926     for node in self.op.nodes:
10927       self.node_data[node].group = self.group_uuid
10928
10929     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10930
10931   @staticmethod
10932   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10933     """Check for split instances after a node assignment.
10934
10935     This method considers a series of node assignments as an atomic operation,
10936     and returns information about split instances after applying the set of
10937     changes.
10938
10939     In particular, it returns information about newly split instances, and
10940     instances that were already split, and remain so after the change.
10941
10942     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10943     considered.
10944
10945     @type changes: list of (node_name, new_group_uuid) pairs.
10946     @param changes: list of node assignments to consider.
10947     @param node_data: a dict with data for all nodes
10948     @param instance_data: a dict with all instances to consider
10949     @rtype: a two-tuple
10950     @return: a list of instances that were previously okay and result split as a
10951       consequence of this change, and a list of instances that were previously
10952       split and this change does not fix.
10953
10954     """
10955     changed_nodes = dict((node, group) for node, group in changes
10956                          if node_data[node].group != group)
10957
10958     all_split_instances = set()
10959     previously_split_instances = set()
10960
10961     def InstanceNodes(instance):
10962       return [instance.primary_node] + list(instance.secondary_nodes)
10963
10964     for inst in instance_data.values():
10965       if inst.disk_template not in constants.DTS_INT_MIRROR:
10966         continue
10967
10968       instance_nodes = InstanceNodes(inst)
10969
10970       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10971         previously_split_instances.add(inst.name)
10972
10973       if len(set(changed_nodes.get(node, node_data[node].group)
10974                  for node in instance_nodes)) > 1:
10975         all_split_instances.add(inst.name)
10976
10977     return (list(all_split_instances - previously_split_instances),
10978             list(previously_split_instances & all_split_instances))
10979
10980
10981 class _GroupQuery(_QueryBase):
10982   FIELDS = query.GROUP_FIELDS
10983
10984   def ExpandNames(self, lu):
10985     lu.needed_locks = {}
10986
10987     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10988     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10989
10990     if not self.names:
10991       self.wanted = [name_to_uuid[name]
10992                      for name in utils.NiceSort(name_to_uuid.keys())]
10993     else:
10994       # Accept names to be either names or UUIDs.
10995       missing = []
10996       self.wanted = []
10997       all_uuid = frozenset(self._all_groups.keys())
10998
10999       for name in self.names:
11000         if name in all_uuid:
11001           self.wanted.append(name)
11002         elif name in name_to_uuid:
11003           self.wanted.append(name_to_uuid[name])
11004         else:
11005           missing.append(name)
11006
11007       if missing:
11008         raise errors.OpPrereqError("Some groups do not exist: %s" %
11009                                    utils.CommaJoin(missing),
11010                                    errors.ECODE_NOENT)
11011
11012   def DeclareLocks(self, lu, level):
11013     pass
11014
11015   def _GetQueryData(self, lu):
11016     """Computes the list of node groups and their attributes.
11017
11018     """
11019     do_nodes = query.GQ_NODE in self.requested_data
11020     do_instances = query.GQ_INST in self.requested_data
11021
11022     group_to_nodes = None
11023     group_to_instances = None
11024
11025     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11026     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11027     # latter GetAllInstancesInfo() is not enough, for we have to go through
11028     # instance->node. Hence, we will need to process nodes even if we only need
11029     # instance information.
11030     if do_nodes or do_instances:
11031       all_nodes = lu.cfg.GetAllNodesInfo()
11032       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11033       node_to_group = {}
11034
11035       for node in all_nodes.values():
11036         if node.group in group_to_nodes:
11037           group_to_nodes[node.group].append(node.name)
11038           node_to_group[node.name] = node.group
11039
11040       if do_instances:
11041         all_instances = lu.cfg.GetAllInstancesInfo()
11042         group_to_instances = dict((uuid, []) for uuid in self.wanted)
11043
11044         for instance in all_instances.values():
11045           node = instance.primary_node
11046           if node in node_to_group:
11047             group_to_instances[node_to_group[node]].append(instance.name)
11048
11049         if not do_nodes:
11050           # Do not pass on node information if it was not requested.
11051           group_to_nodes = None
11052
11053     return query.GroupQueryData([self._all_groups[uuid]
11054                                  for uuid in self.wanted],
11055                                 group_to_nodes, group_to_instances)
11056
11057
11058 class LUGroupQuery(NoHooksLU):
11059   """Logical unit for querying node groups.
11060
11061   """
11062   REQ_BGL = False
11063
11064   def CheckArguments(self):
11065     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11066                           self.op.output_fields, False)
11067
11068   def ExpandNames(self):
11069     self.gq.ExpandNames(self)
11070
11071   def Exec(self, feedback_fn):
11072     return self.gq.OldStyleQuery(self)
11073
11074
11075 class LUGroupSetParams(LogicalUnit):
11076   """Modifies the parameters of a node group.
11077
11078   """
11079   HPATH = "group-modify"
11080   HTYPE = constants.HTYPE_GROUP
11081   REQ_BGL = False
11082
11083   def CheckArguments(self):
11084     all_changes = [
11085       self.op.ndparams,
11086       self.op.alloc_policy,
11087       ]
11088
11089     if all_changes.count(None) == len(all_changes):
11090       raise errors.OpPrereqError("Please pass at least one modification",
11091                                  errors.ECODE_INVAL)
11092
11093   def ExpandNames(self):
11094     # This raises errors.OpPrereqError on its own:
11095     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11096
11097     self.needed_locks = {
11098       locking.LEVEL_NODEGROUP: [self.group_uuid],
11099       }
11100
11101   def CheckPrereq(self):
11102     """Check prerequisites.
11103
11104     """
11105     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11106
11107     if self.group is None:
11108       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11109                                (self.op.group_name, self.group_uuid))
11110
11111     if self.op.ndparams:
11112       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11113       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11114       self.new_ndparams = new_ndparams
11115
11116   def BuildHooksEnv(self):
11117     """Build hooks env.
11118
11119     """
11120     return {
11121       "GROUP_NAME": self.op.group_name,
11122       "NEW_ALLOC_POLICY": self.op.alloc_policy,
11123       }
11124
11125   def BuildHooksNodes(self):
11126     """Build hooks nodes.
11127
11128     """
11129     mn = self.cfg.GetMasterNode()
11130     return ([mn], [mn])
11131
11132   def Exec(self, feedback_fn):
11133     """Modifies the node group.
11134
11135     """
11136     result = []
11137
11138     if self.op.ndparams:
11139       self.group.ndparams = self.new_ndparams
11140       result.append(("ndparams", str(self.group.ndparams)))
11141
11142     if self.op.alloc_policy:
11143       self.group.alloc_policy = self.op.alloc_policy
11144
11145     self.cfg.Update(self.group, feedback_fn)
11146     return result
11147
11148
11149
11150 class LUGroupRemove(LogicalUnit):
11151   HPATH = "group-remove"
11152   HTYPE = constants.HTYPE_GROUP
11153   REQ_BGL = False
11154
11155   def ExpandNames(self):
11156     # This will raises errors.OpPrereqError on its own:
11157     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11158     self.needed_locks = {
11159       locking.LEVEL_NODEGROUP: [self.group_uuid],
11160       }
11161
11162   def CheckPrereq(self):
11163     """Check prerequisites.
11164
11165     This checks that the given group name exists as a node group, that is
11166     empty (i.e., contains no nodes), and that is not the last group of the
11167     cluster.
11168
11169     """
11170     # Verify that the group is empty.
11171     group_nodes = [node.name
11172                    for node in self.cfg.GetAllNodesInfo().values()
11173                    if node.group == self.group_uuid]
11174
11175     if group_nodes:
11176       raise errors.OpPrereqError("Group '%s' not empty, has the following"
11177                                  " nodes: %s" %
11178                                  (self.op.group_name,
11179                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
11180                                  errors.ECODE_STATE)
11181
11182     # Verify the cluster would not be left group-less.
11183     if len(self.cfg.GetNodeGroupList()) == 1:
11184       raise errors.OpPrereqError("Group '%s' is the only group,"
11185                                  " cannot be removed" %
11186                                  self.op.group_name,
11187                                  errors.ECODE_STATE)
11188
11189   def BuildHooksEnv(self):
11190     """Build hooks env.
11191
11192     """
11193     return {
11194       "GROUP_NAME": self.op.group_name,
11195       }
11196
11197   def BuildHooksNodes(self):
11198     """Build hooks nodes.
11199
11200     """
11201     mn = self.cfg.GetMasterNode()
11202     return ([mn], [mn])
11203
11204   def Exec(self, feedback_fn):
11205     """Remove the node group.
11206
11207     """
11208     try:
11209       self.cfg.RemoveNodeGroup(self.group_uuid)
11210     except errors.ConfigurationError:
11211       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11212                                (self.op.group_name, self.group_uuid))
11213
11214     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11215
11216
11217 class LUGroupRename(LogicalUnit):
11218   HPATH = "group-rename"
11219   HTYPE = constants.HTYPE_GROUP
11220   REQ_BGL = False
11221
11222   def ExpandNames(self):
11223     # This raises errors.OpPrereqError on its own:
11224     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11225
11226     self.needed_locks = {
11227       locking.LEVEL_NODEGROUP: [self.group_uuid],
11228       }
11229
11230   def CheckPrereq(self):
11231     """Check prerequisites.
11232
11233     Ensures requested new name is not yet used.
11234
11235     """
11236     try:
11237       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11238     except errors.OpPrereqError:
11239       pass
11240     else:
11241       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11242                                  " node group (UUID: %s)" %
11243                                  (self.op.new_name, new_name_uuid),
11244                                  errors.ECODE_EXISTS)
11245
11246   def BuildHooksEnv(self):
11247     """Build hooks env.
11248
11249     """
11250     return {
11251       "OLD_NAME": self.op.group_name,
11252       "NEW_NAME": self.op.new_name,
11253       }
11254
11255   def BuildHooksNodes(self):
11256     """Build hooks nodes.
11257
11258     """
11259     mn = self.cfg.GetMasterNode()
11260
11261     all_nodes = self.cfg.GetAllNodesInfo()
11262     all_nodes.pop(mn, None)
11263
11264     run_nodes = [mn]
11265     run_nodes.extend(node.name for node in all_nodes.values()
11266                      if node.group == self.group_uuid)
11267
11268     return (run_nodes, run_nodes)
11269
11270   def Exec(self, feedback_fn):
11271     """Rename the node group.
11272
11273     """
11274     group = self.cfg.GetNodeGroup(self.group_uuid)
11275
11276     if group is None:
11277       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11278                                (self.op.group_name, self.group_uuid))
11279
11280     group.name = self.op.new_name
11281     self.cfg.Update(group, feedback_fn)
11282
11283     return self.op.new_name
11284
11285
11286 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11287   """Generic tags LU.
11288
11289   This is an abstract class which is the parent of all the other tags LUs.
11290
11291   """
11292   def ExpandNames(self):
11293     self.group_uuid = None
11294     self.needed_locks = {}
11295     if self.op.kind == constants.TAG_NODE:
11296       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11297       self.needed_locks[locking.LEVEL_NODE] = self.op.name
11298     elif self.op.kind == constants.TAG_INSTANCE:
11299       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11300       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11301     elif self.op.kind == constants.TAG_NODEGROUP:
11302       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11303
11304     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11305     # not possible to acquire the BGL based on opcode parameters)
11306
11307   def CheckPrereq(self):
11308     """Check prerequisites.
11309
11310     """
11311     if self.op.kind == constants.TAG_CLUSTER:
11312       self.target = self.cfg.GetClusterInfo()
11313     elif self.op.kind == constants.TAG_NODE:
11314       self.target = self.cfg.GetNodeInfo(self.op.name)
11315     elif self.op.kind == constants.TAG_INSTANCE:
11316       self.target = self.cfg.GetInstanceInfo(self.op.name)
11317     elif self.op.kind == constants.TAG_NODEGROUP:
11318       self.target = self.cfg.GetNodeGroup(self.group_uuid)
11319     else:
11320       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11321                                  str(self.op.kind), errors.ECODE_INVAL)
11322
11323
11324 class LUTagsGet(TagsLU):
11325   """Returns the tags of a given object.
11326
11327   """
11328   REQ_BGL = False
11329
11330   def ExpandNames(self):
11331     TagsLU.ExpandNames(self)
11332
11333     # Share locks as this is only a read operation
11334     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11335
11336   def Exec(self, feedback_fn):
11337     """Returns the tag list.
11338
11339     """
11340     return list(self.target.GetTags())
11341
11342
11343 class LUTagsSearch(NoHooksLU):
11344   """Searches the tags for a given pattern.
11345
11346   """
11347   REQ_BGL = False
11348
11349   def ExpandNames(self):
11350     self.needed_locks = {}
11351
11352   def CheckPrereq(self):
11353     """Check prerequisites.
11354
11355     This checks the pattern passed for validity by compiling it.
11356
11357     """
11358     try:
11359       self.re = re.compile(self.op.pattern)
11360     except re.error, err:
11361       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11362                                  (self.op.pattern, err), errors.ECODE_INVAL)
11363
11364   def Exec(self, feedback_fn):
11365     """Returns the tag list.
11366
11367     """
11368     cfg = self.cfg
11369     tgts = [("/cluster", cfg.GetClusterInfo())]
11370     ilist = cfg.GetAllInstancesInfo().values()
11371     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11372     nlist = cfg.GetAllNodesInfo().values()
11373     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11374     tgts.extend(("/nodegroup/%s" % n.name, n)
11375                 for n in cfg.GetAllNodeGroupsInfo().values())
11376     results = []
11377     for path, target in tgts:
11378       for tag in target.GetTags():
11379         if self.re.search(tag):
11380           results.append((path, tag))
11381     return results
11382
11383
11384 class LUTagsSet(TagsLU):
11385   """Sets a tag on a given object.
11386
11387   """
11388   REQ_BGL = False
11389
11390   def CheckPrereq(self):
11391     """Check prerequisites.
11392
11393     This checks the type and length of the tag name and value.
11394
11395     """
11396     TagsLU.CheckPrereq(self)
11397     for tag in self.op.tags:
11398       objects.TaggableObject.ValidateTag(tag)
11399
11400   def Exec(self, feedback_fn):
11401     """Sets the tag.
11402
11403     """
11404     try:
11405       for tag in self.op.tags:
11406         self.target.AddTag(tag)
11407     except errors.TagError, err:
11408       raise errors.OpExecError("Error while setting tag: %s" % str(err))
11409     self.cfg.Update(self.target, feedback_fn)
11410
11411
11412 class LUTagsDel(TagsLU):
11413   """Delete a list of tags from a given object.
11414
11415   """
11416   REQ_BGL = False
11417
11418   def CheckPrereq(self):
11419     """Check prerequisites.
11420
11421     This checks that we have the given tag.
11422
11423     """
11424     TagsLU.CheckPrereq(self)
11425     for tag in self.op.tags:
11426       objects.TaggableObject.ValidateTag(tag)
11427     del_tags = frozenset(self.op.tags)
11428     cur_tags = self.target.GetTags()
11429
11430     diff_tags = del_tags - cur_tags
11431     if diff_tags:
11432       diff_names = ("'%s'" % i for i in sorted(diff_tags))
11433       raise errors.OpPrereqError("Tag(s) %s not found" %
11434                                  (utils.CommaJoin(diff_names), ),
11435                                  errors.ECODE_NOENT)
11436
11437   def Exec(self, feedback_fn):
11438     """Remove the tag from the object.
11439
11440     """
11441     for tag in self.op.tags:
11442       self.target.RemoveTag(tag)
11443     self.cfg.Update(self.target, feedback_fn)
11444
11445
11446 class LUTestDelay(NoHooksLU):
11447   """Sleep for a specified amount of time.
11448
11449   This LU sleeps on the master and/or nodes for a specified amount of
11450   time.
11451
11452   """
11453   REQ_BGL = False
11454
11455   def ExpandNames(self):
11456     """Expand names and set required locks.
11457
11458     This expands the node list, if any.
11459
11460     """
11461     self.needed_locks = {}
11462     if self.op.on_nodes:
11463       # _GetWantedNodes can be used here, but is not always appropriate to use
11464       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11465       # more information.
11466       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11467       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11468
11469   def _TestDelay(self):
11470     """Do the actual sleep.
11471
11472     """
11473     if self.op.on_master:
11474       if not utils.TestDelay(self.op.duration):
11475         raise errors.OpExecError("Error during master delay test")
11476     if self.op.on_nodes:
11477       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11478       for node, node_result in result.items():
11479         node_result.Raise("Failure during rpc call to node %s" % node)
11480
11481   def Exec(self, feedback_fn):
11482     """Execute the test delay opcode, with the wanted repetitions.
11483
11484     """
11485     if self.op.repeat == 0:
11486       self._TestDelay()
11487     else:
11488       top_value = self.op.repeat - 1
11489       for i in range(self.op.repeat):
11490         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11491         self._TestDelay()
11492
11493
11494 class LUTestJqueue(NoHooksLU):
11495   """Utility LU to test some aspects of the job queue.
11496
11497   """
11498   REQ_BGL = False
11499
11500   # Must be lower than default timeout for WaitForJobChange to see whether it
11501   # notices changed jobs
11502   _CLIENT_CONNECT_TIMEOUT = 20.0
11503   _CLIENT_CONFIRM_TIMEOUT = 60.0
11504
11505   @classmethod
11506   def _NotifyUsingSocket(cls, cb, errcls):
11507     """Opens a Unix socket and waits for another program to connect.
11508
11509     @type cb: callable
11510     @param cb: Callback to send socket name to client
11511     @type errcls: class
11512     @param errcls: Exception class to use for errors
11513
11514     """
11515     # Using a temporary directory as there's no easy way to create temporary
11516     # sockets without writing a custom loop around tempfile.mktemp and
11517     # socket.bind
11518     tmpdir = tempfile.mkdtemp()
11519     try:
11520       tmpsock = utils.PathJoin(tmpdir, "sock")
11521
11522       logging.debug("Creating temporary socket at %s", tmpsock)
11523       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11524       try:
11525         sock.bind(tmpsock)
11526         sock.listen(1)
11527
11528         # Send details to client
11529         cb(tmpsock)
11530
11531         # Wait for client to connect before continuing
11532         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11533         try:
11534           (conn, _) = sock.accept()
11535         except socket.error, err:
11536           raise errcls("Client didn't connect in time (%s)" % err)
11537       finally:
11538         sock.close()
11539     finally:
11540       # Remove as soon as client is connected
11541       shutil.rmtree(tmpdir)
11542
11543     # Wait for client to close
11544     try:
11545       try:
11546         # pylint: disable-msg=E1101
11547         # Instance of '_socketobject' has no ... member
11548         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11549         conn.recv(1)
11550       except socket.error, err:
11551         raise errcls("Client failed to confirm notification (%s)" % err)
11552     finally:
11553       conn.close()
11554
11555   def _SendNotification(self, test, arg, sockname):
11556     """Sends a notification to the client.
11557
11558     @type test: string
11559     @param test: Test name
11560     @param arg: Test argument (depends on test)
11561     @type sockname: string
11562     @param sockname: Socket path
11563
11564     """
11565     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11566
11567   def _Notify(self, prereq, test, arg):
11568     """Notifies the client of a test.
11569
11570     @type prereq: bool
11571     @param prereq: Whether this is a prereq-phase test
11572     @type test: string
11573     @param test: Test name
11574     @param arg: Test argument (depends on test)
11575
11576     """
11577     if prereq:
11578       errcls = errors.OpPrereqError
11579     else:
11580       errcls = errors.OpExecError
11581
11582     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11583                                                   test, arg),
11584                                    errcls)
11585
11586   def CheckArguments(self):
11587     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11588     self.expandnames_calls = 0
11589
11590   def ExpandNames(self):
11591     checkargs_calls = getattr(self, "checkargs_calls", 0)
11592     if checkargs_calls < 1:
11593       raise errors.ProgrammerError("CheckArguments was not called")
11594
11595     self.expandnames_calls += 1
11596
11597     if self.op.notify_waitlock:
11598       self._Notify(True, constants.JQT_EXPANDNAMES, None)
11599
11600     self.LogInfo("Expanding names")
11601
11602     # Get lock on master node (just to get a lock, not for a particular reason)
11603     self.needed_locks = {
11604       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11605       }
11606
11607   def Exec(self, feedback_fn):
11608     if self.expandnames_calls < 1:
11609       raise errors.ProgrammerError("ExpandNames was not called")
11610
11611     if self.op.notify_exec:
11612       self._Notify(False, constants.JQT_EXEC, None)
11613
11614     self.LogInfo("Executing")
11615
11616     if self.op.log_messages:
11617       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11618       for idx, msg in enumerate(self.op.log_messages):
11619         self.LogInfo("Sending log message %s", idx + 1)
11620         feedback_fn(constants.JQT_MSGPREFIX + msg)
11621         # Report how many test messages have been sent
11622         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11623
11624     if self.op.fail:
11625       raise errors.OpExecError("Opcode failure was requested")
11626
11627     return True
11628
11629
11630 class IAllocator(object):
11631   """IAllocator framework.
11632
11633   An IAllocator instance has three sets of attributes:
11634     - cfg that is needed to query the cluster
11635     - input data (all members of the _KEYS class attribute are required)
11636     - four buffer attributes (in|out_data|text), that represent the
11637       input (to the external script) in text and data structure format,
11638       and the output from it, again in two formats
11639     - the result variables from the script (success, info, nodes) for
11640       easy usage
11641
11642   """
11643   # pylint: disable-msg=R0902
11644   # lots of instance attributes
11645   _ALLO_KEYS = [
11646     "name", "mem_size", "disks", "disk_template",
11647     "os", "tags", "nics", "vcpus", "hypervisor",
11648     ]
11649   _RELO_KEYS = [
11650     "name", "relocate_from",
11651     ]
11652   _EVAC_KEYS = [
11653     "evac_nodes",
11654     ]
11655
11656   def __init__(self, cfg, rpc, mode, **kwargs):
11657     self.cfg = cfg
11658     self.rpc = rpc
11659     # init buffer variables
11660     self.in_text = self.out_text = self.in_data = self.out_data = None
11661     # init all input fields so that pylint is happy
11662     self.mode = mode
11663     self.mem_size = self.disks = self.disk_template = None
11664     self.os = self.tags = self.nics = self.vcpus = None
11665     self.hypervisor = None
11666     self.relocate_from = None
11667     self.name = None
11668     self.evac_nodes = None
11669     # computed fields
11670     self.required_nodes = None
11671     # init result fields
11672     self.success = self.info = self.result = None
11673     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11674       keyset = self._ALLO_KEYS
11675       fn = self._AddNewInstance
11676     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11677       keyset = self._RELO_KEYS
11678       fn = self._AddRelocateInstance
11679     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11680       keyset = self._EVAC_KEYS
11681       fn = self._AddEvacuateNodes
11682     else:
11683       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11684                                    " IAllocator" % self.mode)
11685     for key in kwargs:
11686       if key not in keyset:
11687         raise errors.ProgrammerError("Invalid input parameter '%s' to"
11688                                      " IAllocator" % key)
11689       setattr(self, key, kwargs[key])
11690
11691     for key in keyset:
11692       if key not in kwargs:
11693         raise errors.ProgrammerError("Missing input parameter '%s' to"
11694                                      " IAllocator" % key)
11695     self._BuildInputData(fn)
11696
11697   def _ComputeClusterData(self):
11698     """Compute the generic allocator input data.
11699
11700     This is the data that is independent of the actual operation.
11701
11702     """
11703     cfg = self.cfg
11704     cluster_info = cfg.GetClusterInfo()
11705     # cluster data
11706     data = {
11707       "version": constants.IALLOCATOR_VERSION,
11708       "cluster_name": cfg.GetClusterName(),
11709       "cluster_tags": list(cluster_info.GetTags()),
11710       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11711       # we don't have job IDs
11712       }
11713     ninfo = cfg.GetAllNodesInfo()
11714     iinfo = cfg.GetAllInstancesInfo().values()
11715     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11716
11717     # node data
11718     node_list = [n.name for n in ninfo.values() if n.vm_capable]
11719
11720     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11721       hypervisor_name = self.hypervisor
11722     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11723       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11724     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11725       hypervisor_name = cluster_info.enabled_hypervisors[0]
11726
11727     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11728                                         hypervisor_name)
11729     node_iinfo = \
11730       self.rpc.call_all_instances_info(node_list,
11731                                        cluster_info.enabled_hypervisors)
11732
11733     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11734
11735     config_ndata = self._ComputeBasicNodeData(ninfo)
11736     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11737                                                  i_list, config_ndata)
11738     assert len(data["nodes"]) == len(ninfo), \
11739         "Incomplete node data computed"
11740
11741     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11742
11743     self.in_data = data
11744
11745   @staticmethod
11746   def _ComputeNodeGroupData(cfg):
11747     """Compute node groups data.
11748
11749     """
11750     ng = {}
11751     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11752       ng[guuid] = {
11753         "name": gdata.name,
11754         "alloc_policy": gdata.alloc_policy,
11755         }
11756     return ng
11757
11758   @staticmethod
11759   def _ComputeBasicNodeData(node_cfg):
11760     """Compute global node data.
11761
11762     @rtype: dict
11763     @returns: a dict of name: (node dict, node config)
11764
11765     """
11766     node_results = {}
11767     for ninfo in node_cfg.values():
11768       # fill in static (config-based) values
11769       pnr = {
11770         "tags": list(ninfo.GetTags()),
11771         "primary_ip": ninfo.primary_ip,
11772         "secondary_ip": ninfo.secondary_ip,
11773         "offline": ninfo.offline,
11774         "drained": ninfo.drained,
11775         "master_candidate": ninfo.master_candidate,
11776         "group": ninfo.group,
11777         "master_capable": ninfo.master_capable,
11778         "vm_capable": ninfo.vm_capable,
11779         }
11780
11781       node_results[ninfo.name] = pnr
11782
11783     return node_results
11784
11785   @staticmethod
11786   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11787                               node_results):
11788     """Compute global node data.
11789
11790     @param node_results: the basic node structures as filled from the config
11791
11792     """
11793     # make a copy of the current dict
11794     node_results = dict(node_results)
11795     for nname, nresult in node_data.items():
11796       assert nname in node_results, "Missing basic data for node %s" % nname
11797       ninfo = node_cfg[nname]
11798
11799       if not (ninfo.offline or ninfo.drained):
11800         nresult.Raise("Can't get data for node %s" % nname)
11801         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11802                                 nname)
11803         remote_info = nresult.payload
11804
11805         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11806                      'vg_size', 'vg_free', 'cpu_total']:
11807           if attr not in remote_info:
11808             raise errors.OpExecError("Node '%s' didn't return attribute"
11809                                      " '%s'" % (nname, attr))
11810           if not isinstance(remote_info[attr], int):
11811             raise errors.OpExecError("Node '%s' returned invalid value"
11812                                      " for '%s': %s" %
11813                                      (nname, attr, remote_info[attr]))
11814         # compute memory used by primary instances
11815         i_p_mem = i_p_up_mem = 0
11816         for iinfo, beinfo in i_list:
11817           if iinfo.primary_node == nname:
11818             i_p_mem += beinfo[constants.BE_MEMORY]
11819             if iinfo.name not in node_iinfo[nname].payload:
11820               i_used_mem = 0
11821             else:
11822               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11823             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11824             remote_info['memory_free'] -= max(0, i_mem_diff)
11825
11826             if iinfo.admin_up:
11827               i_p_up_mem += beinfo[constants.BE_MEMORY]
11828
11829         # compute memory used by instances
11830         pnr_dyn = {
11831           "total_memory": remote_info['memory_total'],
11832           "reserved_memory": remote_info['memory_dom0'],
11833           "free_memory": remote_info['memory_free'],
11834           "total_disk": remote_info['vg_size'],
11835           "free_disk": remote_info['vg_free'],
11836           "total_cpus": remote_info['cpu_total'],
11837           "i_pri_memory": i_p_mem,
11838           "i_pri_up_memory": i_p_up_mem,
11839           }
11840         pnr_dyn.update(node_results[nname])
11841         node_results[nname] = pnr_dyn
11842
11843     return node_results
11844
11845   @staticmethod
11846   def _ComputeInstanceData(cluster_info, i_list):
11847     """Compute global instance data.
11848
11849     """
11850     instance_data = {}
11851     for iinfo, beinfo in i_list:
11852       nic_data = []
11853       for nic in iinfo.nics:
11854         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11855         nic_dict = {"mac": nic.mac,
11856                     "ip": nic.ip,
11857                     "mode": filled_params[constants.NIC_MODE],
11858                     "link": filled_params[constants.NIC_LINK],
11859                    }
11860         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11861           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11862         nic_data.append(nic_dict)
11863       pir = {
11864         "tags": list(iinfo.GetTags()),
11865         "admin_up": iinfo.admin_up,
11866         "vcpus": beinfo[constants.BE_VCPUS],
11867         "memory": beinfo[constants.BE_MEMORY],
11868         "os": iinfo.os,
11869         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11870         "nics": nic_data,
11871         "disks": [{constants.IDISK_SIZE: dsk.size,
11872                    constants.IDISK_MODE: dsk.mode}
11873                   for dsk in iinfo.disks],
11874         "disk_template": iinfo.disk_template,
11875         "hypervisor": iinfo.hypervisor,
11876         }
11877       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11878                                                  pir["disks"])
11879       instance_data[iinfo.name] = pir
11880
11881     return instance_data
11882
11883   def _AddNewInstance(self):
11884     """Add new instance data to allocator structure.
11885
11886     This in combination with _AllocatorGetClusterData will create the
11887     correct structure needed as input for the allocator.
11888
11889     The checks for the completeness of the opcode must have already been
11890     done.
11891
11892     """
11893     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11894
11895     if self.disk_template in constants.DTS_INT_MIRROR:
11896       self.required_nodes = 2
11897     else:
11898       self.required_nodes = 1
11899     request = {
11900       "name": self.name,
11901       "disk_template": self.disk_template,
11902       "tags": self.tags,
11903       "os": self.os,
11904       "vcpus": self.vcpus,
11905       "memory": self.mem_size,
11906       "disks": self.disks,
11907       "disk_space_total": disk_space,
11908       "nics": self.nics,
11909       "required_nodes": self.required_nodes,
11910       }
11911     return request
11912
11913   def _AddRelocateInstance(self):
11914     """Add relocate instance data to allocator structure.
11915
11916     This in combination with _IAllocatorGetClusterData will create the
11917     correct structure needed as input for the allocator.
11918
11919     The checks for the completeness of the opcode must have already been
11920     done.
11921
11922     """
11923     instance = self.cfg.GetInstanceInfo(self.name)
11924     if instance is None:
11925       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11926                                    " IAllocator" % self.name)
11927
11928     if instance.disk_template not in constants.DTS_MIRRORED:
11929       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11930                                  errors.ECODE_INVAL)
11931
11932     if instance.disk_template in constants.DTS_INT_MIRROR and \
11933         len(instance.secondary_nodes) != 1:
11934       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11935                                  errors.ECODE_STATE)
11936
11937     self.required_nodes = 1
11938     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
11939     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11940
11941     request = {
11942       "name": self.name,
11943       "disk_space_total": disk_space,
11944       "required_nodes": self.required_nodes,
11945       "relocate_from": self.relocate_from,
11946       }
11947     return request
11948
11949   def _AddEvacuateNodes(self):
11950     """Add evacuate nodes data to allocator structure.
11951
11952     """
11953     request = {
11954       "evac_nodes": self.evac_nodes
11955       }
11956     return request
11957
11958   def _BuildInputData(self, fn):
11959     """Build input data structures.
11960
11961     """
11962     self._ComputeClusterData()
11963
11964     request = fn()
11965     request["type"] = self.mode
11966     self.in_data["request"] = request
11967
11968     self.in_text = serializer.Dump(self.in_data)
11969
11970   def Run(self, name, validate=True, call_fn=None):
11971     """Run an instance allocator and return the results.
11972
11973     """
11974     if call_fn is None:
11975       call_fn = self.rpc.call_iallocator_runner
11976
11977     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11978     result.Raise("Failure while running the iallocator script")
11979
11980     self.out_text = result.payload
11981     if validate:
11982       self._ValidateResult()
11983
11984   def _ValidateResult(self):
11985     """Process the allocator results.
11986
11987     This will process and if successful save the result in
11988     self.out_data and the other parameters.
11989
11990     """
11991     try:
11992       rdict = serializer.Load(self.out_text)
11993     except Exception, err:
11994       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11995
11996     if not isinstance(rdict, dict):
11997       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11998
11999     # TODO: remove backwards compatiblity in later versions
12000     if "nodes" in rdict and "result" not in rdict:
12001       rdict["result"] = rdict["nodes"]
12002       del rdict["nodes"]
12003
12004     for key in "success", "info", "result":
12005       if key not in rdict:
12006         raise errors.OpExecError("Can't parse iallocator results:"
12007                                  " missing key '%s'" % key)
12008       setattr(self, key, rdict[key])
12009
12010     if not isinstance(rdict["result"], list):
12011       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
12012                                " is not a list")
12013
12014     if self.mode == constants.IALLOCATOR_MODE_RELOC:
12015       assert self.relocate_from is not None
12016       assert self.required_nodes == 1
12017
12018       node2group = dict((name, ndata["group"])
12019                         for (name, ndata) in self.in_data["nodes"].items())
12020
12021       fn = compat.partial(self._NodesToGroups, node2group,
12022                           self.in_data["nodegroups"])
12023
12024       request_groups = fn(self.relocate_from)
12025       result_groups = fn(rdict["result"])
12026
12027       if result_groups != request_groups:
12028         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
12029                                  " differ from original groups (%s)" %
12030                                  (utils.CommaJoin(result_groups),
12031                                   utils.CommaJoin(request_groups)))
12032
12033     self.out_data = rdict
12034
12035   @staticmethod
12036   def _NodesToGroups(node2group, groups, nodes):
12037     """Returns a list of unique group names for a list of nodes.
12038
12039     @type node2group: dict
12040     @param node2group: Map from node name to group UUID
12041     @type groups: dict
12042     @param groups: Group information
12043     @type nodes: list
12044     @param nodes: Node names
12045
12046     """
12047     result = set()
12048
12049     for node in nodes:
12050       try:
12051         group_uuid = node2group[node]
12052       except KeyError:
12053         # Ignore unknown node
12054         pass
12055       else:
12056         try:
12057           group = groups[group_uuid]
12058         except KeyError:
12059           # Can't find group, let's use UUID
12060           group_name = group_uuid
12061         else:
12062           group_name = group["name"]
12063
12064         result.add(group_name)
12065
12066     return sorted(result)
12067
12068
12069 class LUTestAllocator(NoHooksLU):
12070   """Run allocator tests.
12071
12072   This LU runs the allocator tests
12073
12074   """
12075   def CheckPrereq(self):
12076     """Check prerequisites.
12077
12078     This checks the opcode parameters depending on the director and mode test.
12079
12080     """
12081     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12082       for attr in ["mem_size", "disks", "disk_template",
12083                    "os", "tags", "nics", "vcpus"]:
12084         if not hasattr(self.op, attr):
12085           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12086                                      attr, errors.ECODE_INVAL)
12087       iname = self.cfg.ExpandInstanceName(self.op.name)
12088       if iname is not None:
12089         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12090                                    iname, errors.ECODE_EXISTS)
12091       if not isinstance(self.op.nics, list):
12092         raise errors.OpPrereqError("Invalid parameter 'nics'",
12093                                    errors.ECODE_INVAL)
12094       if not isinstance(self.op.disks, list):
12095         raise errors.OpPrereqError("Invalid parameter 'disks'",
12096                                    errors.ECODE_INVAL)
12097       for row in self.op.disks:
12098         if (not isinstance(row, dict) or
12099             "size" not in row or
12100             not isinstance(row["size"], int) or
12101             "mode" not in row or
12102             row["mode"] not in ['r', 'w']):
12103           raise errors.OpPrereqError("Invalid contents of the 'disks'"
12104                                      " parameter", errors.ECODE_INVAL)
12105       if self.op.hypervisor is None:
12106         self.op.hypervisor = self.cfg.GetHypervisorType()
12107     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12108       fname = _ExpandInstanceName(self.cfg, self.op.name)
12109       self.op.name = fname
12110       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12111     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12112       if not hasattr(self.op, "evac_nodes"):
12113         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12114                                    " opcode input", errors.ECODE_INVAL)
12115     else:
12116       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12117                                  self.op.mode, errors.ECODE_INVAL)
12118
12119     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12120       if self.op.allocator is None:
12121         raise errors.OpPrereqError("Missing allocator name",
12122                                    errors.ECODE_INVAL)
12123     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12124       raise errors.OpPrereqError("Wrong allocator test '%s'" %
12125                                  self.op.direction, errors.ECODE_INVAL)
12126
12127   def Exec(self, feedback_fn):
12128     """Run the allocator test.
12129
12130     """
12131     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12132       ial = IAllocator(self.cfg, self.rpc,
12133                        mode=self.op.mode,
12134                        name=self.op.name,
12135                        mem_size=self.op.mem_size,
12136                        disks=self.op.disks,
12137                        disk_template=self.op.disk_template,
12138                        os=self.op.os,
12139                        tags=self.op.tags,
12140                        nics=self.op.nics,
12141                        vcpus=self.op.vcpus,
12142                        hypervisor=self.op.hypervisor,
12143                        )
12144     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12145       ial = IAllocator(self.cfg, self.rpc,
12146                        mode=self.op.mode,
12147                        name=self.op.name,
12148                        relocate_from=list(self.relocate_from),
12149                        )
12150     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12151       ial = IAllocator(self.cfg, self.rpc,
12152                        mode=self.op.mode,
12153                        evac_nodes=self.op.evac_nodes)
12154     else:
12155       raise errors.ProgrammerError("Uncatched mode %s in"
12156                                    " LUTestAllocator.Exec", self.op.mode)
12157
12158     if self.op.direction == constants.IALLOCATOR_DIR_IN:
12159       result = ial.in_text
12160     else:
12161       ial.Run(self.op.allocator, validate=False)
12162       result = ial.out_text
12163     return result
12164
12165
12166 #: Query type implementations
12167 _QUERY_IMPL = {
12168   constants.QR_INSTANCE: _InstanceQuery,
12169   constants.QR_NODE: _NodeQuery,
12170   constants.QR_GROUP: _GroupQuery,
12171   constants.QR_OS: _OsQuery,
12172   }
12173
12174 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12175
12176
12177 def _GetQueryImplementation(name):
12178   """Returns the implemtnation for a query type.
12179
12180   @param name: Query type, must be one of L{constants.QR_VIA_OP}
12181
12182   """
12183   try:
12184     return _QUERY_IMPL[name]
12185   except KeyError:
12186     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12187                                errors.ECODE_INVAL)