code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 class ResultWithJobs:
  78   """Data container for LU results with jobs.
  79
  80   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  81   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  82   contained in the C{jobs} attribute and include the job IDs in the opcode
  83   result.
  84
  85   """
  86   def __init__(self, jobs, **kwargs):
  87     """Initializes this class.
  88
  89     Additional return values can be specified as keyword arguments.
  90
  91     @type jobs: list of lists of L{opcode.OpCode}
  92     @param jobs: A list of lists of opcode objects
  93
  94     """
  95     self.jobs = jobs
  96     self.other = kwargs
  97
  98
  99 class LogicalUnit(object):
 100   """Logical Unit base class.
 101
 102   Subclasses must follow these rules:
 103     - implement ExpandNames
 104     - implement CheckPrereq (except when tasklets are used)
 105     - implement Exec (except when tasklets are used)
 106     - implement BuildHooksEnv
 107     - implement BuildHooksNodes
 108     - redefine HPATH and HTYPE
 109     - optionally redefine their run requirements:
 110         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 111
 112   Note that all commands require root permissions.
 113
 114   @ivar dry_run_result: the value (if any) that will be returned to the caller
 115       in dry-run mode (signalled by opcode dry_run parameter)
 116
 117   """
 118   HPATH = None
 119   HTYPE = None
 120   REQ_BGL = True
 121
 122   def __init__(self, processor, op, context, rpc):
 123     """Constructor for LogicalUnit.
 124
 125     This needs to be overridden in derived classes in order to check op
 126     validity.
 127
 128     """
 129     self.proc = processor
 130     self.op = op
 131     self.cfg = context.cfg
 132     self.context = context
 133     self.rpc = rpc
 134     # Dicts used to declare locking needs to mcpu
 135     self.needed_locks = None
 136     self.acquired_locks = {}
 137     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 138     self.add_locks = {}
 139     self.remove_locks = {}
 140     # Used to force good behavior when calling helper functions
 141     self.recalculate_locks = {}
 142     # logging
 143     self.Log = processor.Log # pylint: disable-msg=C0103
 144     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 145     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 146     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 147     # support for dry-run
 148     self.dry_run_result = None
 149     # support for generic debug attribute
 150     if (not hasattr(self.op, "debug_level") or
 151         not isinstance(self.op.debug_level, int)):
 152       self.op.debug_level = 0
 153
 154     # Tasklets
 155     self.tasklets = None
 156
 157     # Validate opcode parameters and set defaults
 158     self.op.Validate(True)
 159
 160     self.CheckArguments()
 161
 162   def CheckArguments(self):
 163     """Check syntactic validity for the opcode arguments.
 164
 165     This method is for doing a simple syntactic check and ensure
 166     validity of opcode parameters, without any cluster-related
 167     checks. While the same can be accomplished in ExpandNames and/or
 168     CheckPrereq, doing these separate is better because:
 169
 170       - ExpandNames is left as as purely a lock-related function
 171       - CheckPrereq is run after we have acquired locks (and possible
 172         waited for them)
 173
 174     The function is allowed to change the self.op attribute so that
 175     later methods can no longer worry about missing parameters.
 176
 177     """
 178     pass
 179
 180   def ExpandNames(self):
 181     """Expand names for this LU.
 182
 183     This method is called before starting to execute the opcode, and it should
 184     update all the parameters of the opcode to their canonical form (e.g. a
 185     short node name must be fully expanded after this method has successfully
 186     completed). This way locking, hooks, logging, etc. can work correctly.
 187
 188     LUs which implement this method must also populate the self.needed_locks
 189     member, as a dict with lock levels as keys, and a list of needed lock names
 190     as values. Rules:
 191
 192       - use an empty dict if you don't need any lock
 193       - if you don't need any lock at a particular level omit that level
 194       - don't put anything for the BGL level
 195       - if you want all locks at a level use locking.ALL_SET as a value
 196
 197     If you need to share locks (rather than acquire them exclusively) at one
 198     level you can modify self.share_locks, setting a true value (usually 1) for
 199     that level. By default locks are not shared.
 200
 201     This function can also define a list of tasklets, which then will be
 202     executed in order instead of the usual LU-level CheckPrereq and Exec
 203     functions, if those are not defined by the LU.
 204
 205     Examples::
 206
 207       # Acquire all nodes and one instance
 208       self.needed_locks = {
 209         locking.LEVEL_NODE: locking.ALL_SET,
 210         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 211       }
 212       # Acquire just two nodes
 213       self.needed_locks = {
 214         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 215       }
 216       # Acquire no locks
 217       self.needed_locks = {} # No, you can't leave it to the default value None
 218
 219     """
 220     # The implementation of this method is mandatory only if the new LU is
 221     # concurrent, so that old LUs don't need to be changed all at the same
 222     # time.
 223     if self.REQ_BGL:
 224       self.needed_locks = {} # Exclusive LUs don't need locks.
 225     else:
 226       raise NotImplementedError
 227
 228   def DeclareLocks(self, level):
 229     """Declare LU locking needs for a level
 230
 231     While most LUs can just declare their locking needs at ExpandNames time,
 232     sometimes there's the need to calculate some locks after having acquired
 233     the ones before. This function is called just before acquiring locks at a
 234     particular level, but after acquiring the ones at lower levels, and permits
 235     such calculations. It can be used to modify self.needed_locks, and by
 236     default it does nothing.
 237
 238     This function is only called if you have something already set in
 239     self.needed_locks for the level.
 240
 241     @param level: Locking level which is going to be locked
 242     @type level: member of ganeti.locking.LEVELS
 243
 244     """
 245
 246   def CheckPrereq(self):
 247     """Check prerequisites for this LU.
 248
 249     This method should check that the prerequisites for the execution
 250     of this LU are fulfilled. It can do internode communication, but
 251     it should be idempotent - no cluster or system changes are
 252     allowed.
 253
 254     The method should raise errors.OpPrereqError in case something is
 255     not fulfilled. Its return value is ignored.
 256
 257     This method should also update all the parameters of the opcode to
 258     their canonical form if it hasn't been done by ExpandNames before.
 259
 260     """
 261     if self.tasklets is not None:
 262       for (idx, tl) in enumerate(self.tasklets):
 263         logging.debug("Checking prerequisites for tasklet %s/%s",
 264                       idx + 1, len(self.tasklets))
 265         tl.CheckPrereq()
 266     else:
 267       pass
 268
 269   def Exec(self, feedback_fn):
 270     """Execute the LU.
 271
 272     This method should implement the actual work. It should raise
 273     errors.OpExecError for failures that are somewhat dealt with in
 274     code, or expected.
 275
 276     """
 277     if self.tasklets is not None:
 278       for (idx, tl) in enumerate(self.tasklets):
 279         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 280         tl.Exec(feedback_fn)
 281     else:
 282       raise NotImplementedError
 283
 284   def BuildHooksEnv(self):
 285     """Build hooks environment for this LU.
 286
 287     @rtype: dict
 288     @return: Dictionary containing the environment that will be used for
 289       running the hooks for this LU. The keys of the dict must not be prefixed
 290       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 291       will extend the environment with additional variables. If no environment
 292       should be defined, an empty dictionary should be returned (not C{None}).
 293     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 294       will not be called.
 295
 296     """
 297     raise NotImplementedError
 298
 299   def BuildHooksNodes(self):
 300     """Build list of nodes to run LU's hooks.
 301
 302     @rtype: tuple; (list, list)
 303     @return: Tuple containing a list of node names on which the hook
 304       should run before the execution and a list of node names on which the
 305       hook should run after the execution. No nodes should be returned as an
 306       empty list (and not None).
 307     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 308       will not be called.
 309
 310     """
 311     raise NotImplementedError
 312
 313   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 314     """Notify the LU about the results of its hooks.
 315
 316     This method is called every time a hooks phase is executed, and notifies
 317     the Logical Unit about the hooks' result. The LU can then use it to alter
 318     its result based on the hooks.  By default the method does nothing and the
 319     previous result is passed back unchanged but any LU can define it if it
 320     wants to use the local cluster hook-scripts somehow.
 321
 322     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 323         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 324     @param hook_results: the results of the multi-node hooks rpc call
 325     @param feedback_fn: function used send feedback back to the caller
 326     @param lu_result: the previous Exec result this LU had, or None
 327         in the PRE phase
 328     @return: the new Exec result, based on the previous result
 329         and hook results
 330
 331     """
 332     # API must be kept, thus we ignore the unused argument and could
 333     # be a function warnings
 334     # pylint: disable-msg=W0613,R0201
 335     return lu_result
 336
 337   def _ExpandAndLockInstance(self):
 338     """Helper function to expand and lock an instance.
 339
 340     Many LUs that work on an instance take its name in self.op.instance_name
 341     and need to expand it and then declare the expanded name for locking. This
 342     function does it, and then updates self.op.instance_name to the expanded
 343     name. It also initializes needed_locks as a dict, if this hasn't been done
 344     before.
 345
 346     """
 347     if self.needed_locks is None:
 348       self.needed_locks = {}
 349     else:
 350       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 351         "_ExpandAndLockInstance called with instance-level locks set"
 352     self.op.instance_name = _ExpandInstanceName(self.cfg,
 353                                                 self.op.instance_name)
 354     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 355
 356   def _LockInstancesNodes(self, primary_only=False):
 357     """Helper function to declare instances' nodes for locking.
 358
 359     This function should be called after locking one or more instances to lock
 360     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 361     with all primary or secondary nodes for instances already locked and
 362     present in self.needed_locks[locking.LEVEL_INSTANCE].
 363
 364     It should be called from DeclareLocks, and for safety only works if
 365     self.recalculate_locks[locking.LEVEL_NODE] is set.
 366
 367     In the future it may grow parameters to just lock some instance's nodes, or
 368     to just lock primaries or secondary nodes, if needed.
 369
 370     If should be called in DeclareLocks in a way similar to::
 371
 372       if level == locking.LEVEL_NODE:
 373         self._LockInstancesNodes()
 374
 375     @type primary_only: boolean
 376     @param primary_only: only lock primary nodes of locked instances
 377
 378     """
 379     assert locking.LEVEL_NODE in self.recalculate_locks, \
 380       "_LockInstancesNodes helper function called with no nodes to recalculate"
 381
 382     # TODO: check if we're really been called with the instance locks held
 383
 384     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 385     # future we might want to have different behaviors depending on the value
 386     # of self.recalculate_locks[locking.LEVEL_NODE]
 387     wanted_nodes = []
 388     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 389       instance = self.context.cfg.GetInstanceInfo(instance_name)
 390       wanted_nodes.append(instance.primary_node)
 391       if not primary_only:
 392         wanted_nodes.extend(instance.secondary_nodes)
 393
 394     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 395       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 396     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 397       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 398
 399     del self.recalculate_locks[locking.LEVEL_NODE]
 400
 401
 402 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 403   """Simple LU which runs no hooks.
 404
 405   This LU is intended as a parent for other LogicalUnits which will
 406   run no hooks, in order to reduce duplicate code.
 407
 408   """
 409   HPATH = None
 410   HTYPE = None
 411
 412   def BuildHooksEnv(self):
 413     """Empty BuildHooksEnv for NoHooksLu.
 414
 415     This just raises an error.
 416
 417     """
 418     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 419
 420   def BuildHooksNodes(self):
 421     """Empty BuildHooksNodes for NoHooksLU.
 422
 423     """
 424     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 425
 426
 427 class Tasklet:
 428   """Tasklet base class.
 429
 430   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 431   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 432   tasklets know nothing about locks.
 433
 434   Subclasses must follow these rules:
 435     - Implement CheckPrereq
 436     - Implement Exec
 437
 438   """
 439   def __init__(self, lu):
 440     self.lu = lu
 441
 442     # Shortcuts
 443     self.cfg = lu.cfg
 444     self.rpc = lu.rpc
 445
 446   def CheckPrereq(self):
 447     """Check prerequisites for this tasklets.
 448
 449     This method should check whether the prerequisites for the execution of
 450     this tasklet are fulfilled. It can do internode communication, but it
 451     should be idempotent - no cluster or system changes are allowed.
 452
 453     The method should raise errors.OpPrereqError in case something is not
 454     fulfilled. Its return value is ignored.
 455
 456     This method should also update all parameters to their canonical form if it
 457     hasn't been done before.
 458
 459     """
 460     pass
 461
 462   def Exec(self, feedback_fn):
 463     """Execute the tasklet.
 464
 465     This method should implement the actual work. It should raise
 466     errors.OpExecError for failures that are somewhat dealt with in code, or
 467     expected.
 468
 469     """
 470     raise NotImplementedError
 471
 472
 473 class _QueryBase:
 474   """Base for query utility classes.
 475
 476   """
 477   #: Attribute holding field definitions
 478   FIELDS = None
 479
 480   def __init__(self, filter_, fields, use_locking):
 481     """Initializes this class.
 482
 483     """
 484     self.use_locking = use_locking
 485
 486     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 487                              namefield="name")
 488     self.requested_data = self.query.RequestedData()
 489     self.names = self.query.RequestedNames()
 490
 491     # Sort only if no names were requested
 492     self.sort_by_name = not self.names
 493
 494     self.do_locking = None
 495     self.wanted = None
 496
 497   def _GetNames(self, lu, all_names, lock_level):
 498     """Helper function to determine names asked for in the query.
 499
 500     """
 501     if self.do_locking:
 502       names = lu.acquired_locks[lock_level]
 503     else:
 504       names = all_names
 505
 506     if self.wanted == locking.ALL_SET:
 507       assert not self.names
 508       # caller didn't specify names, so ordering is not important
 509       return utils.NiceSort(names)
 510
 511     # caller specified names and we must keep the same order
 512     assert self.names
 513     assert not self.do_locking or lu.acquired_locks[lock_level]
 514
 515     missing = set(self.wanted).difference(names)
 516     if missing:
 517       raise errors.OpExecError("Some items were removed before retrieving"
 518                                " their data: %s" % missing)
 519
 520     # Return expanded names
 521     return self.wanted
 522
 523   def ExpandNames(self, lu):
 524     """Expand names for this query.
 525
 526     See L{LogicalUnit.ExpandNames}.
 527
 528     """
 529     raise NotImplementedError()
 530
 531   def DeclareLocks(self, lu, level):
 532     """Declare locks for this query.
 533
 534     See L{LogicalUnit.DeclareLocks}.
 535
 536     """
 537     raise NotImplementedError()
 538
 539   def _GetQueryData(self, lu):
 540     """Collects all data for this query.
 541
 542     @return: Query data object
 543
 544     """
 545     raise NotImplementedError()
 546
 547   def NewStyleQuery(self, lu):
 548     """Collect data and execute query.
 549
 550     """
 551     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 552                                   sort_by_name=self.sort_by_name)
 553
 554   def OldStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return self.query.OldStyleQuery(self._GetQueryData(lu),
 559                                     sort_by_name=self.sort_by_name)
 560
 561
 562 def _GetWantedNodes(lu, nodes):
 563   """Returns list of checked and expanded node names.
 564
 565   @type lu: L{LogicalUnit}
 566   @param lu: the logical unit on whose behalf we execute
 567   @type nodes: list
 568   @param nodes: list of node names or None for all nodes
 569   @rtype: list
 570   @return: the list of nodes, sorted
 571   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 572
 573   """
 574   if nodes:
 575     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 576
 577   return utils.NiceSort(lu.cfg.GetNodeList())
 578
 579
 580 def _GetWantedInstances(lu, instances):
 581   """Returns list of checked and expanded instance names.
 582
 583   @type lu: L{LogicalUnit}
 584   @param lu: the logical unit on whose behalf we execute
 585   @type instances: list
 586   @param instances: list of instance names or None for all instances
 587   @rtype: list
 588   @return: the list of instances, sorted
 589   @raise errors.OpPrereqError: if the instances parameter is wrong type
 590   @raise errors.OpPrereqError: if any of the passed instances is not found
 591
 592   """
 593   if instances:
 594     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 595   else:
 596     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 597   return wanted
 598
 599
 600 def _GetUpdatedParams(old_params, update_dict,
 601                       use_default=True, use_none=False):
 602   """Return the new version of a parameter dictionary.
 603
 604   @type old_params: dict
 605   @param old_params: old parameters
 606   @type update_dict: dict
 607   @param update_dict: dict containing new parameter values, or
 608       constants.VALUE_DEFAULT to reset the parameter to its default
 609       value
 610   @param use_default: boolean
 611   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 612       values as 'to be deleted' values
 613   @param use_none: boolean
 614   @type use_none: whether to recognise C{None} values as 'to be
 615       deleted' values
 616   @rtype: dict
 617   @return: the new parameter dictionary
 618
 619   """
 620   params_copy = copy.deepcopy(old_params)
 621   for key, val in update_dict.iteritems():
 622     if ((use_default and val == constants.VALUE_DEFAULT) or
 623         (use_none and val is None)):
 624       try:
 625         del params_copy[key]
 626       except KeyError:
 627         pass
 628     else:
 629       params_copy[key] = val
 630   return params_copy
 631
 632
 633 def _ReleaseLocks(lu, level, names=None, keep=None):
 634   """Releases locks owned by an LU.
 635
 636   @type lu: L{LogicalUnit}
 637   @param level: Lock level
 638   @type names: list or None
 639   @param names: Names of locks to release
 640   @type keep: list or None
 641   @param keep: Names of locks to retain
 642
 643   """
 644   assert not (keep is not None and names is not None), \
 645          "Only one of the 'names' and the 'keep' parameters can be given"
 646
 647   if names is not None:
 648     should_release = names.__contains__
 649   elif keep:
 650     should_release = lambda name: name not in keep
 651   else:
 652     should_release = None
 653
 654   if should_release:
 655     retain = []
 656     release = []
 657
 658     # Determine which locks to release
 659     for name in lu.acquired_locks[level]:
 660       if should_release(name):
 661         release.append(name)
 662       else:
 663         retain.append(name)
 664
 665     assert len(lu.acquired_locks[level]) == (len(retain) + len(release))
 666
 667     # Release just some locks
 668     lu.context.glm.release(level, names=release)
 669     lu.acquired_locks[level] = retain
 670
 671     assert frozenset(lu.context.glm.list_owned(level)) == frozenset(retain)
 672   else:
 673     # Release everything
 674     lu.context.glm.release(level)
 675     del lu.acquired_locks[level]
 676
 677     assert not lu.context.glm.list_owned(level), "No locks should be owned"
 678
 679
 680 def _RunPostHook(lu, node_name):
 681   """Runs the post-hook for an opcode on a single node.
 682
 683   """
 684   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 685   try:
 686     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 687   except:
 688     # pylint: disable-msg=W0702
 689     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 690
 691
 692 def _CheckOutputFields(static, dynamic, selected):
 693   """Checks whether all selected fields are valid.
 694
 695   @type static: L{utils.FieldSet}
 696   @param static: static fields set
 697   @type dynamic: L{utils.FieldSet}
 698   @param dynamic: dynamic fields set
 699
 700   """
 701   f = utils.FieldSet()
 702   f.Extend(static)
 703   f.Extend(dynamic)
 704
 705   delta = f.NonMatching(selected)
 706   if delta:
 707     raise errors.OpPrereqError("Unknown output fields selected: %s"
 708                                % ",".join(delta), errors.ECODE_INVAL)
 709
 710
 711 def _CheckGlobalHvParams(params):
 712   """Validates that given hypervisor params are not global ones.
 713
 714   This will ensure that instances don't get customised versions of
 715   global params.
 716
 717   """
 718   used_globals = constants.HVC_GLOBALS.intersection(params)
 719   if used_globals:
 720     msg = ("The following hypervisor parameters are global and cannot"
 721            " be customized at instance level, please modify them at"
 722            " cluster level: %s" % utils.CommaJoin(used_globals))
 723     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 724
 725
 726 def _CheckNodeOnline(lu, node, msg=None):
 727   """Ensure that a given node is online.
 728
 729   @param lu: the LU on behalf of which we make the check
 730   @param node: the node to check
 731   @param msg: if passed, should be a message to replace the default one
 732   @raise errors.OpPrereqError: if the node is offline
 733
 734   """
 735   if msg is None:
 736     msg = "Can't use offline node"
 737   if lu.cfg.GetNodeInfo(node).offline:
 738     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 739
 740
 741 def _CheckNodeNotDrained(lu, node):
 742   """Ensure that a given node is not drained.
 743
 744   @param lu: the LU on behalf of which we make the check
 745   @param node: the node to check
 746   @raise errors.OpPrereqError: if the node is drained
 747
 748   """
 749   if lu.cfg.GetNodeInfo(node).drained:
 750     raise errors.OpPrereqError("Can't use drained node %s" % node,
 751                                errors.ECODE_STATE)
 752
 753
 754 def _CheckNodeVmCapable(lu, node):
 755   """Ensure that a given node is vm capable.
 756
 757   @param lu: the LU on behalf of which we make the check
 758   @param node: the node to check
 759   @raise errors.OpPrereqError: if the node is not vm capable
 760
 761   """
 762   if not lu.cfg.GetNodeInfo(node).vm_capable:
 763     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 764                                errors.ECODE_STATE)
 765
 766
 767 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 768   """Ensure that a node supports a given OS.
 769
 770   @param lu: the LU on behalf of which we make the check
 771   @param node: the node to check
 772   @param os_name: the OS to query about
 773   @param force_variant: whether to ignore variant errors
 774   @raise errors.OpPrereqError: if the node is not supporting the OS
 775
 776   """
 777   result = lu.rpc.call_os_get(node, os_name)
 778   result.Raise("OS '%s' not in supported OS list for node %s" %
 779                (os_name, node),
 780                prereq=True, ecode=errors.ECODE_INVAL)
 781   if not force_variant:
 782     _CheckOSVariant(result.payload, os_name)
 783
 784
 785 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 786   """Ensure that a node has the given secondary ip.
 787
 788   @type lu: L{LogicalUnit}
 789   @param lu: the LU on behalf of which we make the check
 790   @type node: string
 791   @param node: the node to check
 792   @type secondary_ip: string
 793   @param secondary_ip: the ip to check
 794   @type prereq: boolean
 795   @param prereq: whether to throw a prerequisite or an execute error
 796   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 797   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 798
 799   """
 800   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 801   result.Raise("Failure checking secondary ip on node %s" % node,
 802                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 803   if not result.payload:
 804     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 805            " please fix and re-run this command" % secondary_ip)
 806     if prereq:
 807       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 808     else:
 809       raise errors.OpExecError(msg)
 810
 811
 812 def _GetClusterDomainSecret():
 813   """Reads the cluster domain secret.
 814
 815   """
 816   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 817                                strict=True)
 818
 819
 820 def _CheckInstanceDown(lu, instance, reason):
 821   """Ensure that an instance is not running."""
 822   if instance.admin_up:
 823     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 824                                (instance.name, reason), errors.ECODE_STATE)
 825
 826   pnode = instance.primary_node
 827   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 828   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 829               prereq=True, ecode=errors.ECODE_ENVIRON)
 830
 831   if instance.name in ins_l.payload:
 832     raise errors.OpPrereqError("Instance %s is running, %s" %
 833                                (instance.name, reason), errors.ECODE_STATE)
 834
 835
 836 def _ExpandItemName(fn, name, kind):
 837   """Expand an item name.
 838
 839   @param fn: the function to use for expansion
 840   @param name: requested item name
 841   @param kind: text description ('Node' or 'Instance')
 842   @return: the resolved (full) name
 843   @raise errors.OpPrereqError: if the item is not found
 844
 845   """
 846   full_name = fn(name)
 847   if full_name is None:
 848     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 849                                errors.ECODE_NOENT)
 850   return full_name
 851
 852
 853 def _ExpandNodeName(cfg, name):
 854   """Wrapper over L{_ExpandItemName} for nodes."""
 855   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 856
 857
 858 def _ExpandInstanceName(cfg, name):
 859   """Wrapper over L{_ExpandItemName} for instance."""
 860   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 861
 862
 863 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 864                           memory, vcpus, nics, disk_template, disks,
 865                           bep, hvp, hypervisor_name):
 866   """Builds instance related env variables for hooks
 867
 868   This builds the hook environment from individual variables.
 869
 870   @type name: string
 871   @param name: the name of the instance
 872   @type primary_node: string
 873   @param primary_node: the name of the instance's primary node
 874   @type secondary_nodes: list
 875   @param secondary_nodes: list of secondary nodes as strings
 876   @type os_type: string
 877   @param os_type: the name of the instance's OS
 878   @type status: boolean
 879   @param status: the should_run status of the instance
 880   @type memory: string
 881   @param memory: the memory size of the instance
 882   @type vcpus: string
 883   @param vcpus: the count of VCPUs the instance has
 884   @type nics: list
 885   @param nics: list of tuples (ip, mac, mode, link) representing
 886       the NICs the instance has
 887   @type disk_template: string
 888   @param disk_template: the disk template of the instance
 889   @type disks: list
 890   @param disks: the list of (size, mode) pairs
 891   @type bep: dict
 892   @param bep: the backend parameters for the instance
 893   @type hvp: dict
 894   @param hvp: the hypervisor parameters for the instance
 895   @type hypervisor_name: string
 896   @param hypervisor_name: the hypervisor for the instance
 897   @rtype: dict
 898   @return: the hook environment for this instance
 899
 900   """
 901   if status:
 902     str_status = "up"
 903   else:
 904     str_status = "down"
 905   env = {
 906     "OP_TARGET": name,
 907     "INSTANCE_NAME": name,
 908     "INSTANCE_PRIMARY": primary_node,
 909     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 910     "INSTANCE_OS_TYPE": os_type,
 911     "INSTANCE_STATUS": str_status,
 912     "INSTANCE_MEMORY": memory,
 913     "INSTANCE_VCPUS": vcpus,
 914     "INSTANCE_DISK_TEMPLATE": disk_template,
 915     "INSTANCE_HYPERVISOR": hypervisor_name,
 916   }
 917
 918   if nics:
 919     nic_count = len(nics)
 920     for idx, (ip, mac, mode, link) in enumerate(nics):
 921       if ip is None:
 922         ip = ""
 923       env["INSTANCE_NIC%d_IP" % idx] = ip
 924       env["INSTANCE_NIC%d_MAC" % idx] = mac
 925       env["INSTANCE_NIC%d_MODE" % idx] = mode
 926       env["INSTANCE_NIC%d_LINK" % idx] = link
 927       if mode == constants.NIC_MODE_BRIDGED:
 928         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 929   else:
 930     nic_count = 0
 931
 932   env["INSTANCE_NIC_COUNT"] = nic_count
 933
 934   if disks:
 935     disk_count = len(disks)
 936     for idx, (size, mode) in enumerate(disks):
 937       env["INSTANCE_DISK%d_SIZE" % idx] = size
 938       env["INSTANCE_DISK%d_MODE" % idx] = mode
 939   else:
 940     disk_count = 0
 941
 942   env["INSTANCE_DISK_COUNT"] = disk_count
 943
 944   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 945     for key, value in source.items():
 946       env["INSTANCE_%s_%s" % (kind, key)] = value
 947
 948   return env
 949
 950
 951 def _NICListToTuple(lu, nics):
 952   """Build a list of nic information tuples.
 953
 954   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 955   value in LUInstanceQueryData.
 956
 957   @type lu:  L{LogicalUnit}
 958   @param lu: the logical unit on whose behalf we execute
 959   @type nics: list of L{objects.NIC}
 960   @param nics: list of nics to convert to hooks tuples
 961
 962   """
 963   hooks_nics = []
 964   cluster = lu.cfg.GetClusterInfo()
 965   for nic in nics:
 966     ip = nic.ip
 967     mac = nic.mac
 968     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 969     mode = filled_params[constants.NIC_MODE]
 970     link = filled_params[constants.NIC_LINK]
 971     hooks_nics.append((ip, mac, mode, link))
 972   return hooks_nics
 973
 974
 975 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 976   """Builds instance related env variables for hooks from an object.
 977
 978   @type lu: L{LogicalUnit}
 979   @param lu: the logical unit on whose behalf we execute
 980   @type instance: L{objects.Instance}
 981   @param instance: the instance for which we should build the
 982       environment
 983   @type override: dict
 984   @param override: dictionary with key/values that will override
 985       our values
 986   @rtype: dict
 987   @return: the hook environment dictionary
 988
 989   """
 990   cluster = lu.cfg.GetClusterInfo()
 991   bep = cluster.FillBE(instance)
 992   hvp = cluster.FillHV(instance)
 993   args = {
 994     'name': instance.name,
 995     'primary_node': instance.primary_node,
 996     'secondary_nodes': instance.secondary_nodes,
 997     'os_type': instance.os,
 998     'status': instance.admin_up,
 999     'memory': bep[constants.BE_MEMORY],
1000     'vcpus': bep[constants.BE_VCPUS],
1001     'nics': _NICListToTuple(lu, instance.nics),
1002     'disk_template': instance.disk_template,
1003     'disks': [(disk.size, disk.mode) for disk in instance.disks],
1004     'bep': bep,
1005     'hvp': hvp,
1006     'hypervisor_name': instance.hypervisor,
1007   }
1008   if override:
1009     args.update(override)
1010   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1011
1012
1013 def _AdjustCandidatePool(lu, exceptions):
1014   """Adjust the candidate pool after node operations.
1015
1016   """
1017   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1018   if mod_list:
1019     lu.LogInfo("Promoted nodes to master candidate role: %s",
1020                utils.CommaJoin(node.name for node in mod_list))
1021     for name in mod_list:
1022       lu.context.ReaddNode(name)
1023   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1024   if mc_now > mc_max:
1025     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1026                (mc_now, mc_max))
1027
1028
1029 def _DecideSelfPromotion(lu, exceptions=None):
1030   """Decide whether I should promote myself as a master candidate.
1031
1032   """
1033   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1034   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1035   # the new node will increase mc_max with one, so:
1036   mc_should = min(mc_should + 1, cp_size)
1037   return mc_now < mc_should
1038
1039
1040 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1041   """Check that the brigdes needed by a list of nics exist.
1042
1043   """
1044   cluster = lu.cfg.GetClusterInfo()
1045   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1046   brlist = [params[constants.NIC_LINK] for params in paramslist
1047             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1048   if brlist:
1049     result = lu.rpc.call_bridges_exist(target_node, brlist)
1050     result.Raise("Error checking bridges on destination node '%s'" %
1051                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1052
1053
1054 def _CheckInstanceBridgesExist(lu, instance, node=None):
1055   """Check that the brigdes needed by an instance exist.
1056
1057   """
1058   if node is None:
1059     node = instance.primary_node
1060   _CheckNicsBridgesExist(lu, instance.nics, node)
1061
1062
1063 def _CheckOSVariant(os_obj, name):
1064   """Check whether an OS name conforms to the os variants specification.
1065
1066   @type os_obj: L{objects.OS}
1067   @param os_obj: OS object to check
1068   @type name: string
1069   @param name: OS name passed by the user, to check for validity
1070
1071   """
1072   if not os_obj.supported_variants:
1073     return
1074   variant = objects.OS.GetVariant(name)
1075   if not variant:
1076     raise errors.OpPrereqError("OS name must include a variant",
1077                                errors.ECODE_INVAL)
1078
1079   if variant not in os_obj.supported_variants:
1080     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1081
1082
1083 def _GetNodeInstancesInner(cfg, fn):
1084   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1085
1086
1087 def _GetNodeInstances(cfg, node_name):
1088   """Returns a list of all primary and secondary instances on a node.
1089
1090   """
1091
1092   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1093
1094
1095 def _GetNodePrimaryInstances(cfg, node_name):
1096   """Returns primary instances on a node.
1097
1098   """
1099   return _GetNodeInstancesInner(cfg,
1100                                 lambda inst: node_name == inst.primary_node)
1101
1102
1103 def _GetNodeSecondaryInstances(cfg, node_name):
1104   """Returns secondary instances on a node.
1105
1106   """
1107   return _GetNodeInstancesInner(cfg,
1108                                 lambda inst: node_name in inst.secondary_nodes)
1109
1110
1111 def _GetStorageTypeArgs(cfg, storage_type):
1112   """Returns the arguments for a storage type.
1113
1114   """
1115   # Special case for file storage
1116   if storage_type == constants.ST_FILE:
1117     # storage.FileStorage wants a list of storage directories
1118     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1119
1120   return []
1121
1122
1123 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1124   faulty = []
1125
1126   for dev in instance.disks:
1127     cfg.SetDiskID(dev, node_name)
1128
1129   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1130   result.Raise("Failed to get disk status from node %s" % node_name,
1131                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1132
1133   for idx, bdev_status in enumerate(result.payload):
1134     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1135       faulty.append(idx)
1136
1137   return faulty
1138
1139
1140 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1141   """Check the sanity of iallocator and node arguments and use the
1142   cluster-wide iallocator if appropriate.
1143
1144   Check that at most one of (iallocator, node) is specified. If none is
1145   specified, then the LU's opcode's iallocator slot is filled with the
1146   cluster-wide default iallocator.
1147
1148   @type iallocator_slot: string
1149   @param iallocator_slot: the name of the opcode iallocator slot
1150   @type node_slot: string
1151   @param node_slot: the name of the opcode target node slot
1152
1153   """
1154   node = getattr(lu.op, node_slot, None)
1155   iallocator = getattr(lu.op, iallocator_slot, None)
1156
1157   if node is not None and iallocator is not None:
1158     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1159                                errors.ECODE_INVAL)
1160   elif node is None and iallocator is None:
1161     default_iallocator = lu.cfg.GetDefaultIAllocator()
1162     if default_iallocator:
1163       setattr(lu.op, iallocator_slot, default_iallocator)
1164     else:
1165       raise errors.OpPrereqError("No iallocator or node given and no"
1166                                  " cluster-wide default iallocator found."
1167                                  " Please specify either an iallocator or a"
1168                                  " node, or set a cluster-wide default"
1169                                  " iallocator.")
1170
1171
1172 class LUClusterPostInit(LogicalUnit):
1173   """Logical unit for running hooks after cluster initialization.
1174
1175   """
1176   HPATH = "cluster-init"
1177   HTYPE = constants.HTYPE_CLUSTER
1178
1179   def BuildHooksEnv(self):
1180     """Build hooks env.
1181
1182     """
1183     return {
1184       "OP_TARGET": self.cfg.GetClusterName(),
1185       }
1186
1187   def BuildHooksNodes(self):
1188     """Build hooks nodes.
1189
1190     """
1191     return ([], [self.cfg.GetMasterNode()])
1192
1193   def Exec(self, feedback_fn):
1194     """Nothing to do.
1195
1196     """
1197     return True
1198
1199
1200 class LUClusterDestroy(LogicalUnit):
1201   """Logical unit for destroying the cluster.
1202
1203   """
1204   HPATH = "cluster-destroy"
1205   HTYPE = constants.HTYPE_CLUSTER
1206
1207   def BuildHooksEnv(self):
1208     """Build hooks env.
1209
1210     """
1211     return {
1212       "OP_TARGET": self.cfg.GetClusterName(),
1213       }
1214
1215   def BuildHooksNodes(self):
1216     """Build hooks nodes.
1217
1218     """
1219     return ([], [])
1220
1221   def CheckPrereq(self):
1222     """Check prerequisites.
1223
1224     This checks whether the cluster is empty.
1225
1226     Any errors are signaled by raising errors.OpPrereqError.
1227
1228     """
1229     master = self.cfg.GetMasterNode()
1230
1231     nodelist = self.cfg.GetNodeList()
1232     if len(nodelist) != 1 or nodelist[0] != master:
1233       raise errors.OpPrereqError("There are still %d node(s) in"
1234                                  " this cluster." % (len(nodelist) - 1),
1235                                  errors.ECODE_INVAL)
1236     instancelist = self.cfg.GetInstanceList()
1237     if instancelist:
1238       raise errors.OpPrereqError("There are still %d instance(s) in"
1239                                  " this cluster." % len(instancelist),
1240                                  errors.ECODE_INVAL)
1241
1242   def Exec(self, feedback_fn):
1243     """Destroys the cluster.
1244
1245     """
1246     master = self.cfg.GetMasterNode()
1247
1248     # Run post hooks on master node before it's removed
1249     _RunPostHook(self, master)
1250
1251     result = self.rpc.call_node_stop_master(master, False)
1252     result.Raise("Could not disable the master role")
1253
1254     return master
1255
1256
1257 def _VerifyCertificate(filename):
1258   """Verifies a certificate for LUClusterVerify.
1259
1260   @type filename: string
1261   @param filename: Path to PEM file
1262
1263   """
1264   try:
1265     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1266                                            utils.ReadFile(filename))
1267   except Exception, err: # pylint: disable-msg=W0703
1268     return (LUClusterVerify.ETYPE_ERROR,
1269             "Failed to load X509 certificate %s: %s" % (filename, err))
1270
1271   (errcode, msg) = \
1272     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1273                                 constants.SSL_CERT_EXPIRATION_ERROR)
1274
1275   if msg:
1276     fnamemsg = "While verifying %s: %s" % (filename, msg)
1277   else:
1278     fnamemsg = None
1279
1280   if errcode is None:
1281     return (None, fnamemsg)
1282   elif errcode == utils.CERT_WARNING:
1283     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1284   elif errcode == utils.CERT_ERROR:
1285     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1286
1287   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1288
1289
1290 class LUClusterVerify(LogicalUnit):
1291   """Verifies the cluster status.
1292
1293   """
1294   HPATH = "cluster-verify"
1295   HTYPE = constants.HTYPE_CLUSTER
1296   REQ_BGL = False
1297
1298   TCLUSTER = "cluster"
1299   TNODE = "node"
1300   TINSTANCE = "instance"
1301
1302   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1303   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1304   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1305   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1306   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1307   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1308   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1309   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1310   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1311   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1312   ENODEDRBD = (TNODE, "ENODEDRBD")
1313   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1314   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1315   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1316   ENODEHV = (TNODE, "ENODEHV")
1317   ENODELVM = (TNODE, "ENODELVM")
1318   ENODEN1 = (TNODE, "ENODEN1")
1319   ENODENET = (TNODE, "ENODENET")
1320   ENODEOS = (TNODE, "ENODEOS")
1321   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1322   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1323   ENODERPC = (TNODE, "ENODERPC")
1324   ENODESSH = (TNODE, "ENODESSH")
1325   ENODEVERSION = (TNODE, "ENODEVERSION")
1326   ENODESETUP = (TNODE, "ENODESETUP")
1327   ENODETIME = (TNODE, "ENODETIME")
1328   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1329
1330   ETYPE_FIELD = "code"
1331   ETYPE_ERROR = "ERROR"
1332   ETYPE_WARNING = "WARNING"
1333
1334   _HOOKS_INDENT_RE = re.compile("^", re.M)
1335
1336   class NodeImage(object):
1337     """A class representing the logical and physical status of a node.
1338
1339     @type name: string
1340     @ivar name: the node name to which this object refers
1341     @ivar volumes: a structure as returned from
1342         L{ganeti.backend.GetVolumeList} (runtime)
1343     @ivar instances: a list of running instances (runtime)
1344     @ivar pinst: list of configured primary instances (config)
1345     @ivar sinst: list of configured secondary instances (config)
1346     @ivar sbp: dictionary of {primary-node: list of instances} for all
1347         instances for which this node is secondary (config)
1348     @ivar mfree: free memory, as reported by hypervisor (runtime)
1349     @ivar dfree: free disk, as reported by the node (runtime)
1350     @ivar offline: the offline status (config)
1351     @type rpc_fail: boolean
1352     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1353         not whether the individual keys were correct) (runtime)
1354     @type lvm_fail: boolean
1355     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1356     @type hyp_fail: boolean
1357     @ivar hyp_fail: whether the RPC call didn't return the instance list
1358     @type ghost: boolean
1359     @ivar ghost: whether this is a known node or not (config)
1360     @type os_fail: boolean
1361     @ivar os_fail: whether the RPC call didn't return valid OS data
1362     @type oslist: list
1363     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1364     @type vm_capable: boolean
1365     @ivar vm_capable: whether the node can host instances
1366
1367     """
1368     def __init__(self, offline=False, name=None, vm_capable=True):
1369       self.name = name
1370       self.volumes = {}
1371       self.instances = []
1372       self.pinst = []
1373       self.sinst = []
1374       self.sbp = {}
1375       self.mfree = 0
1376       self.dfree = 0
1377       self.offline = offline
1378       self.vm_capable = vm_capable
1379       self.rpc_fail = False
1380       self.lvm_fail = False
1381       self.hyp_fail = False
1382       self.ghost = False
1383       self.os_fail = False
1384       self.oslist = {}
1385
1386   def ExpandNames(self):
1387     self.needed_locks = {
1388       locking.LEVEL_NODE: locking.ALL_SET,
1389       locking.LEVEL_INSTANCE: locking.ALL_SET,
1390     }
1391     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1392
1393   def _Error(self, ecode, item, msg, *args, **kwargs):
1394     """Format an error message.
1395
1396     Based on the opcode's error_codes parameter, either format a
1397     parseable error code, or a simpler error string.
1398
1399     This must be called only from Exec and functions called from Exec.
1400
1401     """
1402     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1403     itype, etxt = ecode
1404     # first complete the msg
1405     if args:
1406       msg = msg % args
1407     # then format the whole message
1408     if self.op.error_codes:
1409       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1410     else:
1411       if item:
1412         item = " " + item
1413       else:
1414         item = ""
1415       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1416     # and finally report it via the feedback_fn
1417     self._feedback_fn("  - %s" % msg)
1418
1419   def _ErrorIf(self, cond, *args, **kwargs):
1420     """Log an error message if the passed condition is True.
1421
1422     """
1423     cond = bool(cond) or self.op.debug_simulate_errors
1424     if cond:
1425       self._Error(*args, **kwargs)
1426     # do not mark the operation as failed for WARN cases only
1427     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1428       self.bad = self.bad or cond
1429
1430   def _VerifyNode(self, ninfo, nresult):
1431     """Perform some basic validation on data returned from a node.
1432
1433       - check the result data structure is well formed and has all the
1434         mandatory fields
1435       - check ganeti version
1436
1437     @type ninfo: L{objects.Node}
1438     @param ninfo: the node to check
1439     @param nresult: the results from the node
1440     @rtype: boolean
1441     @return: whether overall this call was successful (and we can expect
1442          reasonable values in the respose)
1443
1444     """
1445     node = ninfo.name
1446     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1447
1448     # main result, nresult should be a non-empty dict
1449     test = not nresult or not isinstance(nresult, dict)
1450     _ErrorIf(test, self.ENODERPC, node,
1451                   "unable to verify node: no data returned")
1452     if test:
1453       return False
1454
1455     # compares ganeti version
1456     local_version = constants.PROTOCOL_VERSION
1457     remote_version = nresult.get("version", None)
1458     test = not (remote_version and
1459                 isinstance(remote_version, (list, tuple)) and
1460                 len(remote_version) == 2)
1461     _ErrorIf(test, self.ENODERPC, node,
1462              "connection to node returned invalid data")
1463     if test:
1464       return False
1465
1466     test = local_version != remote_version[0]
1467     _ErrorIf(test, self.ENODEVERSION, node,
1468              "incompatible protocol versions: master %s,"
1469              " node %s", local_version, remote_version[0])
1470     if test:
1471       return False
1472
1473     # node seems compatible, we can actually try to look into its results
1474
1475     # full package version
1476     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1477                   self.ENODEVERSION, node,
1478                   "software version mismatch: master %s, node %s",
1479                   constants.RELEASE_VERSION, remote_version[1],
1480                   code=self.ETYPE_WARNING)
1481
1482     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1483     if ninfo.vm_capable and isinstance(hyp_result, dict):
1484       for hv_name, hv_result in hyp_result.iteritems():
1485         test = hv_result is not None
1486         _ErrorIf(test, self.ENODEHV, node,
1487                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1488
1489     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1490     if ninfo.vm_capable and isinstance(hvp_result, list):
1491       for item, hv_name, hv_result in hvp_result:
1492         _ErrorIf(True, self.ENODEHV, node,
1493                  "hypervisor %s parameter verify failure (source %s): %s",
1494                  hv_name, item, hv_result)
1495
1496     test = nresult.get(constants.NV_NODESETUP,
1497                        ["Missing NODESETUP results"])
1498     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1499              "; ".join(test))
1500
1501     return True
1502
1503   def _VerifyNodeTime(self, ninfo, nresult,
1504                       nvinfo_starttime, nvinfo_endtime):
1505     """Check the node time.
1506
1507     @type ninfo: L{objects.Node}
1508     @param ninfo: the node to check
1509     @param nresult: the remote results for the node
1510     @param nvinfo_starttime: the start time of the RPC call
1511     @param nvinfo_endtime: the end time of the RPC call
1512
1513     """
1514     node = ninfo.name
1515     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1516
1517     ntime = nresult.get(constants.NV_TIME, None)
1518     try:
1519       ntime_merged = utils.MergeTime(ntime)
1520     except (ValueError, TypeError):
1521       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1522       return
1523
1524     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1525       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1526     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1527       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1528     else:
1529       ntime_diff = None
1530
1531     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1532              "Node time diverges by at least %s from master node time",
1533              ntime_diff)
1534
1535   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1536     """Check the node time.
1537
1538     @type ninfo: L{objects.Node}
1539     @param ninfo: the node to check
1540     @param nresult: the remote results for the node
1541     @param vg_name: the configured VG name
1542
1543     """
1544     if vg_name is None:
1545       return
1546
1547     node = ninfo.name
1548     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1549
1550     # checks vg existence and size > 20G
1551     vglist = nresult.get(constants.NV_VGLIST, None)
1552     test = not vglist
1553     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1554     if not test:
1555       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1556                                             constants.MIN_VG_SIZE)
1557       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1558
1559     # check pv names
1560     pvlist = nresult.get(constants.NV_PVLIST, None)
1561     test = pvlist is None
1562     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1563     if not test:
1564       # check that ':' is not present in PV names, since it's a
1565       # special character for lvcreate (denotes the range of PEs to
1566       # use on the PV)
1567       for _, pvname, owner_vg in pvlist:
1568         test = ":" in pvname
1569         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1570                  " '%s' of VG '%s'", pvname, owner_vg)
1571
1572   def _VerifyNodeNetwork(self, ninfo, nresult):
1573     """Check the node time.
1574
1575     @type ninfo: L{objects.Node}
1576     @param ninfo: the node to check
1577     @param nresult: the remote results for the node
1578
1579     """
1580     node = ninfo.name
1581     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1582
1583     test = constants.NV_NODELIST not in nresult
1584     _ErrorIf(test, self.ENODESSH, node,
1585              "node hasn't returned node ssh connectivity data")
1586     if not test:
1587       if nresult[constants.NV_NODELIST]:
1588         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1589           _ErrorIf(True, self.ENODESSH, node,
1590                    "ssh communication with node '%s': %s", a_node, a_msg)
1591
1592     test = constants.NV_NODENETTEST not in nresult
1593     _ErrorIf(test, self.ENODENET, node,
1594              "node hasn't returned node tcp connectivity data")
1595     if not test:
1596       if nresult[constants.NV_NODENETTEST]:
1597         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1598         for anode in nlist:
1599           _ErrorIf(True, self.ENODENET, node,
1600                    "tcp communication with node '%s': %s",
1601                    anode, nresult[constants.NV_NODENETTEST][anode])
1602
1603     test = constants.NV_MASTERIP not in nresult
1604     _ErrorIf(test, self.ENODENET, node,
1605              "node hasn't returned node master IP reachability data")
1606     if not test:
1607       if not nresult[constants.NV_MASTERIP]:
1608         if node == self.master_node:
1609           msg = "the master node cannot reach the master IP (not configured?)"
1610         else:
1611           msg = "cannot reach the master IP"
1612         _ErrorIf(True, self.ENODENET, node, msg)
1613
1614   def _VerifyInstance(self, instance, instanceconfig, node_image,
1615                       diskstatus):
1616     """Verify an instance.
1617
1618     This function checks to see if the required block devices are
1619     available on the instance's node.
1620
1621     """
1622     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1623     node_current = instanceconfig.primary_node
1624
1625     node_vol_should = {}
1626     instanceconfig.MapLVsByNode(node_vol_should)
1627
1628     for node in node_vol_should:
1629       n_img = node_image[node]
1630       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1631         # ignore missing volumes on offline or broken nodes
1632         continue
1633       for volume in node_vol_should[node]:
1634         test = volume not in n_img.volumes
1635         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1636                  "volume %s missing on node %s", volume, node)
1637
1638     if instanceconfig.admin_up:
1639       pri_img = node_image[node_current]
1640       test = instance not in pri_img.instances and not pri_img.offline
1641       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1642                "instance not running on its primary node %s",
1643                node_current)
1644
1645     for node, n_img in node_image.items():
1646       if node != node_current:
1647         test = instance in n_img.instances
1648         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1649                  "instance should not run on node %s", node)
1650
1651     diskdata = [(nname, success, status, idx)
1652                 for (nname, disks) in diskstatus.items()
1653                 for idx, (success, status) in enumerate(disks)]
1654
1655     for nname, success, bdev_status, idx in diskdata:
1656       # the 'ghost node' construction in Exec() ensures that we have a
1657       # node here
1658       snode = node_image[nname]
1659       bad_snode = snode.ghost or snode.offline
1660       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1661                self.EINSTANCEFAULTYDISK, instance,
1662                "couldn't retrieve status for disk/%s on %s: %s",
1663                idx, nname, bdev_status)
1664       _ErrorIf((instanceconfig.admin_up and success and
1665                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1666                self.EINSTANCEFAULTYDISK, instance,
1667                "disk/%s on %s is faulty", idx, nname)
1668
1669   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1670     """Verify if there are any unknown volumes in the cluster.
1671
1672     The .os, .swap and backup volumes are ignored. All other volumes are
1673     reported as unknown.
1674
1675     @type reserved: L{ganeti.utils.FieldSet}
1676     @param reserved: a FieldSet of reserved volume names
1677
1678     """
1679     for node, n_img in node_image.items():
1680       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1681         # skip non-healthy nodes
1682         continue
1683       for volume in n_img.volumes:
1684         test = ((node not in node_vol_should or
1685                 volume not in node_vol_should[node]) and
1686                 not reserved.Matches(volume))
1687         self._ErrorIf(test, self.ENODEORPHANLV, node,
1688                       "volume %s is unknown", volume)
1689
1690   def _VerifyOrphanInstances(self, instancelist, node_image):
1691     """Verify the list of running instances.
1692
1693     This checks what instances are running but unknown to the cluster.
1694
1695     """
1696     for node, n_img in node_image.items():
1697       for o_inst in n_img.instances:
1698         test = o_inst not in instancelist
1699         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1700                       "instance %s on node %s should not exist", o_inst, node)
1701
1702   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1703     """Verify N+1 Memory Resilience.
1704
1705     Check that if one single node dies we can still start all the
1706     instances it was primary for.
1707
1708     """
1709     cluster_info = self.cfg.GetClusterInfo()
1710     for node, n_img in node_image.items():
1711       # This code checks that every node which is now listed as
1712       # secondary has enough memory to host all instances it is
1713       # supposed to should a single other node in the cluster fail.
1714       # FIXME: not ready for failover to an arbitrary node
1715       # FIXME: does not support file-backed instances
1716       # WARNING: we currently take into account down instances as well
1717       # as up ones, considering that even if they're down someone
1718       # might want to start them even in the event of a node failure.
1719       if n_img.offline:
1720         # we're skipping offline nodes from the N+1 warning, since
1721         # most likely we don't have good memory infromation from them;
1722         # we already list instances living on such nodes, and that's
1723         # enough warning
1724         continue
1725       for prinode, instances in n_img.sbp.items():
1726         needed_mem = 0
1727         for instance in instances:
1728           bep = cluster_info.FillBE(instance_cfg[instance])
1729           if bep[constants.BE_AUTO_BALANCE]:
1730             needed_mem += bep[constants.BE_MEMORY]
1731         test = n_img.mfree < needed_mem
1732         self._ErrorIf(test, self.ENODEN1, node,
1733                       "not enough memory to accomodate instance failovers"
1734                       " should node %s fail (%dMiB needed, %dMiB available)",
1735                       prinode, needed_mem, n_img.mfree)
1736
1737   @classmethod
1738   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1739                    (files_all, files_all_opt, files_mc, files_vm)):
1740     """Verifies file checksums collected from all nodes.
1741
1742     @param errorif: Callback for reporting errors
1743     @param nodeinfo: List of L{objects.Node} objects
1744     @param master_node: Name of master node
1745     @param all_nvinfo: RPC results
1746
1747     """
1748     node_names = frozenset(node.name for node in nodeinfo)
1749
1750     assert master_node in node_names
1751     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1752             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1753            "Found file listed in more than one file list"
1754
1755     # Define functions determining which nodes to consider for a file
1756     file2nodefn = dict([(filename, fn)
1757       for (files, fn) in [(files_all, None),
1758                           (files_all_opt, None),
1759                           (files_mc, lambda node: (node.master_candidate or
1760                                                    node.name == master_node)),
1761                           (files_vm, lambda node: node.vm_capable)]
1762       for filename in files])
1763
1764     fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1765
1766     for node in nodeinfo:
1767       nresult = all_nvinfo[node.name]
1768
1769       if nresult.fail_msg or not nresult.payload:
1770         node_files = None
1771       else:
1772         node_files = nresult.payload.get(constants.NV_FILELIST, None)
1773
1774       test = not (node_files and isinstance(node_files, dict))
1775       errorif(test, cls.ENODEFILECHECK, node.name,
1776               "Node did not return file checksum data")
1777       if test:
1778         continue
1779
1780       for (filename, checksum) in node_files.items():
1781         # Check if the file should be considered for a node
1782         fn = file2nodefn[filename]
1783         if fn is None or fn(node):
1784           fileinfo[filename].setdefault(checksum, set()).add(node.name)
1785
1786     for (filename, checksums) in fileinfo.items():
1787       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1788
1789       # Nodes having the file
1790       with_file = frozenset(node_name
1791                             for nodes in fileinfo[filename].values()
1792                             for node_name in nodes)
1793
1794       # Nodes missing file
1795       missing_file = node_names - with_file
1796
1797       if filename in files_all_opt:
1798         # All or no nodes
1799         errorif(missing_file and missing_file != node_names,
1800                 cls.ECLUSTERFILECHECK, None,
1801                 "File %s is optional, but it must exist on all or no nodes (not"
1802                 " found on %s)",
1803                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1804       else:
1805         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1806                 "File %s is missing from node(s) %s", filename,
1807                 utils.CommaJoin(utils.NiceSort(missing_file)))
1808
1809       # See if there are multiple versions of the file
1810       test = len(checksums) > 1
1811       if test:
1812         variants = ["variant %s on %s" %
1813                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1814                     for (idx, (checksum, nodes)) in
1815                       enumerate(sorted(checksums.items()))]
1816       else:
1817         variants = []
1818
1819       errorif(test, cls.ECLUSTERFILECHECK, None,
1820               "File %s found with %s different checksums (%s)",
1821               filename, len(checksums), "; ".join(variants))
1822
1823   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1824                       drbd_map):
1825     """Verifies and the node DRBD status.
1826
1827     @type ninfo: L{objects.Node}
1828     @param ninfo: the node to check
1829     @param nresult: the remote results for the node
1830     @param instanceinfo: the dict of instances
1831     @param drbd_helper: the configured DRBD usermode helper
1832     @param drbd_map: the DRBD map as returned by
1833         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1834
1835     """
1836     node = ninfo.name
1837     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1838
1839     if drbd_helper:
1840       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1841       test = (helper_result == None)
1842       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1843                "no drbd usermode helper returned")
1844       if helper_result:
1845         status, payload = helper_result
1846         test = not status
1847         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1848                  "drbd usermode helper check unsuccessful: %s", payload)
1849         test = status and (payload != drbd_helper)
1850         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1851                  "wrong drbd usermode helper: %s", payload)
1852
1853     # compute the DRBD minors
1854     node_drbd = {}
1855     for minor, instance in drbd_map[node].items():
1856       test = instance not in instanceinfo
1857       _ErrorIf(test, self.ECLUSTERCFG, None,
1858                "ghost instance '%s' in temporary DRBD map", instance)
1859         # ghost instance should not be running, but otherwise we
1860         # don't give double warnings (both ghost instance and
1861         # unallocated minor in use)
1862       if test:
1863         node_drbd[minor] = (instance, False)
1864       else:
1865         instance = instanceinfo[instance]
1866         node_drbd[minor] = (instance.name, instance.admin_up)
1867
1868     # and now check them
1869     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1870     test = not isinstance(used_minors, (tuple, list))
1871     _ErrorIf(test, self.ENODEDRBD, node,
1872              "cannot parse drbd status file: %s", str(used_minors))
1873     if test:
1874       # we cannot check drbd status
1875       return
1876
1877     for minor, (iname, must_exist) in node_drbd.items():
1878       test = minor not in used_minors and must_exist
1879       _ErrorIf(test, self.ENODEDRBD, node,
1880                "drbd minor %d of instance %s is not active", minor, iname)
1881     for minor in used_minors:
1882       test = minor not in node_drbd
1883       _ErrorIf(test, self.ENODEDRBD, node,
1884                "unallocated drbd minor %d is in use", minor)
1885
1886   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1887     """Builds the node OS structures.
1888
1889     @type ninfo: L{objects.Node}
1890     @param ninfo: the node to check
1891     @param nresult: the remote results for the node
1892     @param nimg: the node image object
1893
1894     """
1895     node = ninfo.name
1896     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1897
1898     remote_os = nresult.get(constants.NV_OSLIST, None)
1899     test = (not isinstance(remote_os, list) or
1900             not compat.all(isinstance(v, list) and len(v) == 7
1901                            for v in remote_os))
1902
1903     _ErrorIf(test, self.ENODEOS, node,
1904              "node hasn't returned valid OS data")
1905
1906     nimg.os_fail = test
1907
1908     if test:
1909       return
1910
1911     os_dict = {}
1912
1913     for (name, os_path, status, diagnose,
1914          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1915
1916       if name not in os_dict:
1917         os_dict[name] = []
1918
1919       # parameters is a list of lists instead of list of tuples due to
1920       # JSON lacking a real tuple type, fix it:
1921       parameters = [tuple(v) for v in parameters]
1922       os_dict[name].append((os_path, status, diagnose,
1923                             set(variants), set(parameters), set(api_ver)))
1924
1925     nimg.oslist = os_dict
1926
1927   def _VerifyNodeOS(self, ninfo, nimg, base):
1928     """Verifies the node OS list.
1929
1930     @type ninfo: L{objects.Node}
1931     @param ninfo: the node to check
1932     @param nimg: the node image object
1933     @param base: the 'template' node we match against (e.g. from the master)
1934
1935     """
1936     node = ninfo.name
1937     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1938
1939     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1940
1941     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1942     for os_name, os_data in nimg.oslist.items():
1943       assert os_data, "Empty OS status for OS %s?!" % os_name
1944       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1945       _ErrorIf(not f_status, self.ENODEOS, node,
1946                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1947       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1948                "OS '%s' has multiple entries (first one shadows the rest): %s",
1949                os_name, utils.CommaJoin([v[0] for v in os_data]))
1950       # this will catched in backend too
1951       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1952                and not f_var, self.ENODEOS, node,
1953                "OS %s with API at least %d does not declare any variant",
1954                os_name, constants.OS_API_V15)
1955       # comparisons with the 'base' image
1956       test = os_name not in base.oslist
1957       _ErrorIf(test, self.ENODEOS, node,
1958                "Extra OS %s not present on reference node (%s)",
1959                os_name, base.name)
1960       if test:
1961         continue
1962       assert base.oslist[os_name], "Base node has empty OS status?"
1963       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1964       if not b_status:
1965         # base OS is invalid, skipping
1966         continue
1967       for kind, a, b in [("API version", f_api, b_api),
1968                          ("variants list", f_var, b_var),
1969                          ("parameters", beautify_params(f_param),
1970                           beautify_params(b_param))]:
1971         _ErrorIf(a != b, self.ENODEOS, node,
1972                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
1973                  kind, os_name, base.name,
1974                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1975
1976     # check any missing OSes
1977     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1978     _ErrorIf(missing, self.ENODEOS, node,
1979              "OSes present on reference node %s but missing on this node: %s",
1980              base.name, utils.CommaJoin(missing))
1981
1982   def _VerifyOob(self, ninfo, nresult):
1983     """Verifies out of band functionality of a node.
1984
1985     @type ninfo: L{objects.Node}
1986     @param ninfo: the node to check
1987     @param nresult: the remote results for the node
1988
1989     """
1990     node = ninfo.name
1991     # We just have to verify the paths on master and/or master candidates
1992     # as the oob helper is invoked on the master
1993     if ((ninfo.master_candidate or ninfo.master_capable) and
1994         constants.NV_OOB_PATHS in nresult):
1995       for path_result in nresult[constants.NV_OOB_PATHS]:
1996         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1997
1998   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1999     """Verifies and updates the node volume data.
2000
2001     This function will update a L{NodeImage}'s internal structures
2002     with data from the remote call.
2003
2004     @type ninfo: L{objects.Node}
2005     @param ninfo: the node to check
2006     @param nresult: the remote results for the node
2007     @param nimg: the node image object
2008     @param vg_name: the configured VG name
2009
2010     """
2011     node = ninfo.name
2012     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2013
2014     nimg.lvm_fail = True
2015     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2016     if vg_name is None:
2017       pass
2018     elif isinstance(lvdata, basestring):
2019       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2020                utils.SafeEncode(lvdata))
2021     elif not isinstance(lvdata, dict):
2022       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2023     else:
2024       nimg.volumes = lvdata
2025       nimg.lvm_fail = False
2026
2027   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2028     """Verifies and updates the node instance list.
2029
2030     If the listing was successful, then updates this node's instance
2031     list. Otherwise, it marks the RPC call as failed for the instance
2032     list key.
2033
2034     @type ninfo: L{objects.Node}
2035     @param ninfo: the node to check
2036     @param nresult: the remote results for the node
2037     @param nimg: the node image object
2038
2039     """
2040     idata = nresult.get(constants.NV_INSTANCELIST, None)
2041     test = not isinstance(idata, list)
2042     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2043                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2044     if test:
2045       nimg.hyp_fail = True
2046     else:
2047       nimg.instances = idata
2048
2049   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2050     """Verifies and computes a node information map
2051
2052     @type ninfo: L{objects.Node}
2053     @param ninfo: the node to check
2054     @param nresult: the remote results for the node
2055     @param nimg: the node image object
2056     @param vg_name: the configured VG name
2057
2058     """
2059     node = ninfo.name
2060     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2061
2062     # try to read free memory (from the hypervisor)
2063     hv_info = nresult.get(constants.NV_HVINFO, None)
2064     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2065     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2066     if not test:
2067       try:
2068         nimg.mfree = int(hv_info["memory_free"])
2069       except (ValueError, TypeError):
2070         _ErrorIf(True, self.ENODERPC, node,
2071                  "node returned invalid nodeinfo, check hypervisor")
2072
2073     # FIXME: devise a free space model for file based instances as well
2074     if vg_name is not None:
2075       test = (constants.NV_VGLIST not in nresult or
2076               vg_name not in nresult[constants.NV_VGLIST])
2077       _ErrorIf(test, self.ENODELVM, node,
2078                "node didn't return data for the volume group '%s'"
2079                " - it is either missing or broken", vg_name)
2080       if not test:
2081         try:
2082           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2083         except (ValueError, TypeError):
2084           _ErrorIf(True, self.ENODERPC, node,
2085                    "node returned invalid LVM info, check LVM status")
2086
2087   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2088     """Gets per-disk status information for all instances.
2089
2090     @type nodelist: list of strings
2091     @param nodelist: Node names
2092     @type node_image: dict of (name, L{objects.Node})
2093     @param node_image: Node objects
2094     @type instanceinfo: dict of (name, L{objects.Instance})
2095     @param instanceinfo: Instance objects
2096     @rtype: {instance: {node: [(succes, payload)]}}
2097     @return: a dictionary of per-instance dictionaries with nodes as
2098         keys and disk information as values; the disk information is a
2099         list of tuples (success, payload)
2100
2101     """
2102     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2103
2104     node_disks = {}
2105     node_disks_devonly = {}
2106     diskless_instances = set()
2107     diskless = constants.DT_DISKLESS
2108
2109     for nname in nodelist:
2110       node_instances = list(itertools.chain(node_image[nname].pinst,
2111                                             node_image[nname].sinst))
2112       diskless_instances.update(inst for inst in node_instances
2113                                 if instanceinfo[inst].disk_template == diskless)
2114       disks = [(inst, disk)
2115                for inst in node_instances
2116                for disk in instanceinfo[inst].disks]
2117
2118       if not disks:
2119         # No need to collect data
2120         continue
2121
2122       node_disks[nname] = disks
2123
2124       # Creating copies as SetDiskID below will modify the objects and that can
2125       # lead to incorrect data returned from nodes
2126       devonly = [dev.Copy() for (_, dev) in disks]
2127
2128       for dev in devonly:
2129         self.cfg.SetDiskID(dev, nname)
2130
2131       node_disks_devonly[nname] = devonly
2132
2133     assert len(node_disks) == len(node_disks_devonly)
2134
2135     # Collect data from all nodes with disks
2136     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2137                                                           node_disks_devonly)
2138
2139     assert len(result) == len(node_disks)
2140
2141     instdisk = {}
2142
2143     for (nname, nres) in result.items():
2144       disks = node_disks[nname]
2145
2146       if nres.offline:
2147         # No data from this node
2148         data = len(disks) * [(False, "node offline")]
2149       else:
2150         msg = nres.fail_msg
2151         _ErrorIf(msg, self.ENODERPC, nname,
2152                  "while getting disk information: %s", msg)
2153         if msg:
2154           # No data from this node
2155           data = len(disks) * [(False, msg)]
2156         else:
2157           data = []
2158           for idx, i in enumerate(nres.payload):
2159             if isinstance(i, (tuple, list)) and len(i) == 2:
2160               data.append(i)
2161             else:
2162               logging.warning("Invalid result from node %s, entry %d: %s",
2163                               nname, idx, i)
2164               data.append((False, "Invalid result from the remote node"))
2165
2166       for ((inst, _), status) in zip(disks, data):
2167         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2168
2169     # Add empty entries for diskless instances.
2170     for inst in diskless_instances:
2171       assert inst not in instdisk
2172       instdisk[inst] = {}
2173
2174     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2175                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2176                       compat.all(isinstance(s, (tuple, list)) and
2177                                  len(s) == 2 for s in statuses)
2178                       for inst, nnames in instdisk.items()
2179                       for nname, statuses in nnames.items())
2180     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2181
2182     return instdisk
2183
2184   def _VerifyHVP(self, hvp_data):
2185     """Verifies locally the syntax of the hypervisor parameters.
2186
2187     """
2188     for item, hv_name, hv_params in hvp_data:
2189       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2190              (item, hv_name))
2191       try:
2192         hv_class = hypervisor.GetHypervisor(hv_name)
2193         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2194         hv_class.CheckParameterSyntax(hv_params)
2195       except errors.GenericError, err:
2196         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2197
2198   def BuildHooksEnv(self):
2199     """Build hooks env.
2200
2201     Cluster-Verify hooks just ran in the post phase and their failure makes
2202     the output be logged in the verify output and the verification to fail.
2203
2204     """
2205     cfg = self.cfg
2206
2207     env = {
2208       "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2209       }
2210
2211     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2212                for node in cfg.GetAllNodesInfo().values())
2213
2214     return env
2215
2216   def BuildHooksNodes(self):
2217     """Build hooks nodes.
2218
2219     """
2220     return ([], self.cfg.GetNodeList())
2221
2222   def Exec(self, feedback_fn):
2223     """Verify integrity of cluster, performing various test on nodes.
2224
2225     """
2226     # This method has too many local variables. pylint: disable-msg=R0914
2227     self.bad = False
2228     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2229     verbose = self.op.verbose
2230     self._feedback_fn = feedback_fn
2231     feedback_fn("* Verifying global settings")
2232     for msg in self.cfg.VerifyConfig():
2233       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2234
2235     # Check the cluster certificates
2236     for cert_filename in constants.ALL_CERT_FILES:
2237       (errcode, msg) = _VerifyCertificate(cert_filename)
2238       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2239
2240     vg_name = self.cfg.GetVGName()
2241     drbd_helper = self.cfg.GetDRBDHelper()
2242     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2243     cluster = self.cfg.GetClusterInfo()
2244     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2245     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2246     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2247     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2248     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2249                         for iname in instancelist)
2250     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2251     i_non_redundant = [] # Non redundant instances
2252     i_non_a_balanced = [] # Non auto-balanced instances
2253     n_offline = 0 # Count of offline nodes
2254     n_drained = 0 # Count of nodes being drained
2255     node_vol_should = {}
2256
2257     # FIXME: verify OS list
2258
2259     # File verification
2260     filemap = _ComputeAncillaryFiles(cluster, False)
2261
2262     # do local checksums
2263     master_node = self.master_node = self.cfg.GetMasterNode()
2264     master_ip = self.cfg.GetMasterIP()
2265
2266     # Compute the set of hypervisor parameters
2267     hvp_data = []
2268     for hv_name in hypervisors:
2269       hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2270     for os_name, os_hvp in cluster.os_hvp.items():
2271       for hv_name, hv_params in os_hvp.items():
2272         if not hv_params:
2273           continue
2274         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2275         hvp_data.append(("os %s" % os_name, hv_name, full_params))
2276     # TODO: collapse identical parameter values in a single one
2277     for instance in instanceinfo.values():
2278       if not instance.hvparams:
2279         continue
2280       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2281                        cluster.FillHV(instance)))
2282     # and verify them locally
2283     self._VerifyHVP(hvp_data)
2284
2285     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2286     node_verify_param = {
2287       constants.NV_FILELIST:
2288         utils.UniqueSequence(filename
2289                              for files in filemap
2290                              for filename in files),
2291       constants.NV_NODELIST: [node.name for node in nodeinfo
2292                               if not node.offline],
2293       constants.NV_HYPERVISOR: hypervisors,
2294       constants.NV_HVPARAMS: hvp_data,
2295       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2296                                   node.secondary_ip) for node in nodeinfo
2297                                  if not node.offline],
2298       constants.NV_INSTANCELIST: hypervisors,
2299       constants.NV_VERSION: None,
2300       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2301       constants.NV_NODESETUP: None,
2302       constants.NV_TIME: None,
2303       constants.NV_MASTERIP: (master_node, master_ip),
2304       constants.NV_OSLIST: None,
2305       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2306       }
2307
2308     if vg_name is not None:
2309       node_verify_param[constants.NV_VGLIST] = None
2310       node_verify_param[constants.NV_LVLIST] = vg_name
2311       node_verify_param[constants.NV_PVLIST] = [vg_name]
2312       node_verify_param[constants.NV_DRBDLIST] = None
2313
2314     if drbd_helper:
2315       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2316
2317     # Build our expected cluster state
2318     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2319                                                  name=node.name,
2320                                                  vm_capable=node.vm_capable))
2321                       for node in nodeinfo)
2322
2323     # Gather OOB paths
2324     oob_paths = []
2325     for node in nodeinfo:
2326       path = _SupportsOob(self.cfg, node)
2327       if path and path not in oob_paths:
2328         oob_paths.append(path)
2329
2330     if oob_paths:
2331       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2332
2333     for instance in instancelist:
2334       inst_config = instanceinfo[instance]
2335
2336       for nname in inst_config.all_nodes:
2337         if nname not in node_image:
2338           # ghost node
2339           gnode = self.NodeImage(name=nname)
2340           gnode.ghost = True
2341           node_image[nname] = gnode
2342
2343       inst_config.MapLVsByNode(node_vol_should)
2344
2345       pnode = inst_config.primary_node
2346       node_image[pnode].pinst.append(instance)
2347
2348       for snode in inst_config.secondary_nodes:
2349         nimg = node_image[snode]
2350         nimg.sinst.append(instance)
2351         if pnode not in nimg.sbp:
2352           nimg.sbp[pnode] = []
2353         nimg.sbp[pnode].append(instance)
2354
2355     # At this point, we have the in-memory data structures complete,
2356     # except for the runtime information, which we'll gather next
2357
2358     # Due to the way our RPC system works, exact response times cannot be
2359     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2360     # time before and after executing the request, we can at least have a time
2361     # window.
2362     nvinfo_starttime = time.time()
2363     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2364                                            self.cfg.GetClusterName())
2365     nvinfo_endtime = time.time()
2366
2367     all_drbd_map = self.cfg.ComputeDRBDMap()
2368
2369     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2370     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2371
2372     feedback_fn("* Verifying configuration file consistency")
2373     self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
2374
2375     feedback_fn("* Verifying node status")
2376
2377     refos_img = None
2378
2379     for node_i in nodeinfo:
2380       node = node_i.name
2381       nimg = node_image[node]
2382
2383       if node_i.offline:
2384         if verbose:
2385           feedback_fn("* Skipping offline node %s" % (node,))
2386         n_offline += 1
2387         continue
2388
2389       if node == master_node:
2390         ntype = "master"
2391       elif node_i.master_candidate:
2392         ntype = "master candidate"
2393       elif node_i.drained:
2394         ntype = "drained"
2395         n_drained += 1
2396       else:
2397         ntype = "regular"
2398       if verbose:
2399         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2400
2401       msg = all_nvinfo[node].fail_msg
2402       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2403       if msg:
2404         nimg.rpc_fail = True
2405         continue
2406
2407       nresult = all_nvinfo[node].payload
2408
2409       nimg.call_ok = self._VerifyNode(node_i, nresult)
2410       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2411       self._VerifyNodeNetwork(node_i, nresult)
2412       self._VerifyOob(node_i, nresult)
2413
2414       if nimg.vm_capable:
2415         self._VerifyNodeLVM(node_i, nresult, vg_name)
2416         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2417                              all_drbd_map)
2418
2419         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2420         self._UpdateNodeInstances(node_i, nresult, nimg)
2421         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2422         self._UpdateNodeOS(node_i, nresult, nimg)
2423         if not nimg.os_fail:
2424           if refos_img is None:
2425             refos_img = nimg
2426           self._VerifyNodeOS(node_i, nimg, refos_img)
2427
2428     feedback_fn("* Verifying instance status")
2429     for instance in instancelist:
2430       if verbose:
2431         feedback_fn("* Verifying instance %s" % instance)
2432       inst_config = instanceinfo[instance]
2433       self._VerifyInstance(instance, inst_config, node_image,
2434                            instdisk[instance])
2435       inst_nodes_offline = []
2436
2437       pnode = inst_config.primary_node
2438       pnode_img = node_image[pnode]
2439       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2440                self.ENODERPC, pnode, "instance %s, connection to"
2441                " primary node failed", instance)
2442
2443       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2444                self.EINSTANCEBADNODE, instance,
2445                "instance is marked as running and lives on offline node %s",
2446                inst_config.primary_node)
2447
2448       # If the instance is non-redundant we cannot survive losing its primary
2449       # node, so we are not N+1 compliant. On the other hand we have no disk
2450       # templates with more than one secondary so that situation is not well
2451       # supported either.
2452       # FIXME: does not support file-backed instances
2453       if not inst_config.secondary_nodes:
2454         i_non_redundant.append(instance)
2455
2456       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2457                instance, "instance has multiple secondary nodes: %s",
2458                utils.CommaJoin(inst_config.secondary_nodes),
2459                code=self.ETYPE_WARNING)
2460
2461       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2462         pnode = inst_config.primary_node
2463         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2464         instance_groups = {}
2465
2466         for node in instance_nodes:
2467           instance_groups.setdefault(nodeinfo_byname[node].group,
2468                                      []).append(node)
2469
2470         pretty_list = [
2471           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2472           # Sort so that we always list the primary node first.
2473           for group, nodes in sorted(instance_groups.items(),
2474                                      key=lambda (_, nodes): pnode in nodes,
2475                                      reverse=True)]
2476
2477         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2478                       instance, "instance has primary and secondary nodes in"
2479                       " different groups: %s", utils.CommaJoin(pretty_list),
2480                       code=self.ETYPE_WARNING)
2481
2482       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2483         i_non_a_balanced.append(instance)
2484
2485       for snode in inst_config.secondary_nodes:
2486         s_img = node_image[snode]
2487         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2488                  "instance %s, connection to secondary node failed", instance)
2489
2490         if s_img.offline:
2491           inst_nodes_offline.append(snode)
2492
2493       # warn that the instance lives on offline nodes
2494       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2495                "instance has offline secondary node(s) %s",
2496                utils.CommaJoin(inst_nodes_offline))
2497       # ... or ghost/non-vm_capable nodes
2498       for node in inst_config.all_nodes:
2499         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2500                  "instance lives on ghost node %s", node)
2501         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2502                  instance, "instance lives on non-vm_capable node %s", node)
2503
2504     feedback_fn("* Verifying orphan volumes")
2505     reserved = utils.FieldSet(*cluster.reserved_lvs)
2506     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2507
2508     feedback_fn("* Verifying orphan instances")
2509     self._VerifyOrphanInstances(instancelist, node_image)
2510
2511     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2512       feedback_fn("* Verifying N+1 Memory redundancy")
2513       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2514
2515     feedback_fn("* Other Notes")
2516     if i_non_redundant:
2517       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2518                   % len(i_non_redundant))
2519
2520     if i_non_a_balanced:
2521       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2522                   % len(i_non_a_balanced))
2523
2524     if n_offline:
2525       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2526
2527     if n_drained:
2528       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2529
2530     return not self.bad
2531
2532   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2533     """Analyze the post-hooks' result
2534
2535     This method analyses the hook result, handles it, and sends some
2536     nicely-formatted feedback back to the user.
2537
2538     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2539         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2540     @param hooks_results: the results of the multi-node hooks rpc call
2541     @param feedback_fn: function used send feedback back to the caller
2542     @param lu_result: previous Exec result
2543     @return: the new Exec result, based on the previous result
2544         and hook results
2545
2546     """
2547     # We only really run POST phase hooks, and are only interested in
2548     # their results
2549     if phase == constants.HOOKS_PHASE_POST:
2550       # Used to change hooks' output to proper indentation
2551       feedback_fn("* Hooks Results")
2552       assert hooks_results, "invalid result from hooks"
2553
2554       for node_name in hooks_results:
2555         res = hooks_results[node_name]
2556         msg = res.fail_msg
2557         test = msg and not res.offline
2558         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2559                       "Communication failure in hooks execution: %s", msg)
2560         if res.offline or msg:
2561           # No need to investigate payload if node is offline or gave an error.
2562           # override manually lu_result here as _ErrorIf only
2563           # overrides self.bad
2564           lu_result = 1
2565           continue
2566         for script, hkr, output in res.payload:
2567           test = hkr == constants.HKR_FAIL
2568           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2569                         "Script %s failed, output:", script)
2570           if test:
2571             output = self._HOOKS_INDENT_RE.sub('      ', output)
2572             feedback_fn("%s" % output)
2573             lu_result = 0
2574
2575       return lu_result
2576
2577
2578 class LUClusterVerifyDisks(NoHooksLU):
2579   """Verifies the cluster disks status.
2580
2581   """
2582   REQ_BGL = False
2583
2584   def ExpandNames(self):
2585     self.needed_locks = {
2586       locking.LEVEL_NODE: locking.ALL_SET,
2587       locking.LEVEL_INSTANCE: locking.ALL_SET,
2588     }
2589     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2590
2591   def Exec(self, feedback_fn):
2592     """Verify integrity of cluster disks.
2593
2594     @rtype: tuple of three items
2595     @return: a tuple of (dict of node-to-node_error, list of instances
2596         which need activate-disks, dict of instance: (node, volume) for
2597         missing volumes
2598
2599     """
2600     result = res_nodes, res_instances, res_missing = {}, [], {}
2601
2602     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2603     instances = self.cfg.GetAllInstancesInfo().values()
2604
2605     nv_dict = {}
2606     for inst in instances:
2607       inst_lvs = {}
2608       if not inst.admin_up:
2609         continue
2610       inst.MapLVsByNode(inst_lvs)
2611       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2612       for node, vol_list in inst_lvs.iteritems():
2613         for vol in vol_list:
2614           nv_dict[(node, vol)] = inst
2615
2616     if not nv_dict:
2617       return result
2618
2619     node_lvs = self.rpc.call_lv_list(nodes, [])
2620     for node, node_res in node_lvs.items():
2621       if node_res.offline:
2622         continue
2623       msg = node_res.fail_msg
2624       if msg:
2625         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2626         res_nodes[node] = msg
2627         continue
2628
2629       lvs = node_res.payload
2630       for lv_name, (_, _, lv_online) in lvs.items():
2631         inst = nv_dict.pop((node, lv_name), None)
2632         if (not lv_online and inst is not None
2633             and inst.name not in res_instances):
2634           res_instances.append(inst.name)
2635
2636     # any leftover items in nv_dict are missing LVs, let's arrange the
2637     # data better
2638     for key, inst in nv_dict.iteritems():
2639       if inst.name not in res_missing:
2640         res_missing[inst.name] = []
2641       res_missing[inst.name].append(key)
2642
2643     return result
2644
2645
2646 class LUClusterRepairDiskSizes(NoHooksLU):
2647   """Verifies the cluster disks sizes.
2648
2649   """
2650   REQ_BGL = False
2651
2652   def ExpandNames(self):
2653     if self.op.instances:
2654       self.wanted_names = []
2655       for name in self.op.instances:
2656         full_name = _ExpandInstanceName(self.cfg, name)
2657         self.wanted_names.append(full_name)
2658       self.needed_locks = {
2659         locking.LEVEL_NODE: [],
2660         locking.LEVEL_INSTANCE: self.wanted_names,
2661         }
2662       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2663     else:
2664       self.wanted_names = None
2665       self.needed_locks = {
2666         locking.LEVEL_NODE: locking.ALL_SET,
2667         locking.LEVEL_INSTANCE: locking.ALL_SET,
2668         }
2669     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2670
2671   def DeclareLocks(self, level):
2672     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2673       self._LockInstancesNodes(primary_only=True)
2674
2675   def CheckPrereq(self):
2676     """Check prerequisites.
2677
2678     This only checks the optional instance list against the existing names.
2679
2680     """
2681     if self.wanted_names is None:
2682       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2683
2684     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2685                              in self.wanted_names]
2686
2687   def _EnsureChildSizes(self, disk):
2688     """Ensure children of the disk have the needed disk size.
2689
2690     This is valid mainly for DRBD8 and fixes an issue where the
2691     children have smaller disk size.
2692
2693     @param disk: an L{ganeti.objects.Disk} object
2694
2695     """
2696     if disk.dev_type == constants.LD_DRBD8:
2697       assert disk.children, "Empty children for DRBD8?"
2698       fchild = disk.children[0]
2699       mismatch = fchild.size < disk.size
2700       if mismatch:
2701         self.LogInfo("Child disk has size %d, parent %d, fixing",
2702                      fchild.size, disk.size)
2703         fchild.size = disk.size
2704
2705       # and we recurse on this child only, not on the metadev
2706       return self._EnsureChildSizes(fchild) or mismatch
2707     else:
2708       return False
2709
2710   def Exec(self, feedback_fn):
2711     """Verify the size of cluster disks.
2712
2713     """
2714     # TODO: check child disks too
2715     # TODO: check differences in size between primary/secondary nodes
2716     per_node_disks = {}
2717     for instance in self.wanted_instances:
2718       pnode = instance.primary_node
2719       if pnode not in per_node_disks:
2720         per_node_disks[pnode] = []
2721       for idx, disk in enumerate(instance.disks):
2722         per_node_disks[pnode].append((instance, idx, disk))
2723
2724     changed = []
2725     for node, dskl in per_node_disks.items():
2726       newl = [v[2].Copy() for v in dskl]
2727       for dsk in newl:
2728         self.cfg.SetDiskID(dsk, node)
2729       result = self.rpc.call_blockdev_getsize(node, newl)
2730       if result.fail_msg:
2731         self.LogWarning("Failure in blockdev_getsize call to node"
2732                         " %s, ignoring", node)
2733         continue
2734       if len(result.payload) != len(dskl):
2735         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2736                         " result.payload=%s", node, len(dskl), result.payload)
2737         self.LogWarning("Invalid result from node %s, ignoring node results",
2738                         node)
2739         continue
2740       for ((instance, idx, disk), size) in zip(dskl, result.payload):
2741         if size is None:
2742           self.LogWarning("Disk %d of instance %s did not return size"
2743                           " information, ignoring", idx, instance.name)
2744           continue
2745         if not isinstance(size, (int, long)):
2746           self.LogWarning("Disk %d of instance %s did not return valid"
2747                           " size information, ignoring", idx, instance.name)
2748           continue
2749         size = size >> 20
2750         if size != disk.size:
2751           self.LogInfo("Disk %d of instance %s has mismatched size,"
2752                        " correcting: recorded %d, actual %d", idx,
2753                        instance.name, disk.size, size)
2754           disk.size = size
2755           self.cfg.Update(instance, feedback_fn)
2756           changed.append((instance.name, idx, size))
2757         if self._EnsureChildSizes(disk):
2758           self.cfg.Update(instance, feedback_fn)
2759           changed.append((instance.name, idx, disk.size))
2760     return changed
2761
2762
2763 class LUClusterRename(LogicalUnit):
2764   """Rename the cluster.
2765
2766   """
2767   HPATH = "cluster-rename"
2768   HTYPE = constants.HTYPE_CLUSTER
2769
2770   def BuildHooksEnv(self):
2771     """Build hooks env.
2772
2773     """
2774     return {
2775       "OP_TARGET": self.cfg.GetClusterName(),
2776       "NEW_NAME": self.op.name,
2777       }
2778
2779   def BuildHooksNodes(self):
2780     """Build hooks nodes.
2781
2782     """
2783     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2784
2785   def CheckPrereq(self):
2786     """Verify that the passed name is a valid one.
2787
2788     """
2789     hostname = netutils.GetHostname(name=self.op.name,
2790                                     family=self.cfg.GetPrimaryIPFamily())
2791
2792     new_name = hostname.name
2793     self.ip = new_ip = hostname.ip
2794     old_name = self.cfg.GetClusterName()
2795     old_ip = self.cfg.GetMasterIP()
2796     if new_name == old_name and new_ip == old_ip:
2797       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2798                                  " cluster has changed",
2799                                  errors.ECODE_INVAL)
2800     if new_ip != old_ip:
2801       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2802         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2803                                    " reachable on the network" %
2804                                    new_ip, errors.ECODE_NOTUNIQUE)
2805
2806     self.op.name = new_name
2807
2808   def Exec(self, feedback_fn):
2809     """Rename the cluster.
2810
2811     """
2812     clustername = self.op.name
2813     ip = self.ip
2814
2815     # shutdown the master IP
2816     master = self.cfg.GetMasterNode()
2817     result = self.rpc.call_node_stop_master(master, False)
2818     result.Raise("Could not disable the master role")
2819
2820     try:
2821       cluster = self.cfg.GetClusterInfo()
2822       cluster.cluster_name = clustername
2823       cluster.master_ip = ip
2824       self.cfg.Update(cluster, feedback_fn)
2825
2826       # update the known hosts file
2827       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2828       node_list = self.cfg.GetOnlineNodeList()
2829       try:
2830         node_list.remove(master)
2831       except ValueError:
2832         pass
2833       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2834     finally:
2835       result = self.rpc.call_node_start_master(master, False, False)
2836       msg = result.fail_msg
2837       if msg:
2838         self.LogWarning("Could not re-enable the master role on"
2839                         " the master, please restart manually: %s", msg)
2840
2841     return clustername
2842
2843
2844 class LUClusterSetParams(LogicalUnit):
2845   """Change the parameters of the cluster.
2846
2847   """
2848   HPATH = "cluster-modify"
2849   HTYPE = constants.HTYPE_CLUSTER
2850   REQ_BGL = False
2851
2852   def CheckArguments(self):
2853     """Check parameters
2854
2855     """
2856     if self.op.uid_pool:
2857       uidpool.CheckUidPool(self.op.uid_pool)
2858
2859     if self.op.add_uids:
2860       uidpool.CheckUidPool(self.op.add_uids)
2861
2862     if self.op.remove_uids:
2863       uidpool.CheckUidPool(self.op.remove_uids)
2864
2865   def ExpandNames(self):
2866     # FIXME: in the future maybe other cluster params won't require checking on
2867     # all nodes to be modified.
2868     self.needed_locks = {
2869       locking.LEVEL_NODE: locking.ALL_SET,
2870     }
2871     self.share_locks[locking.LEVEL_NODE] = 1
2872
2873   def BuildHooksEnv(self):
2874     """Build hooks env.
2875
2876     """
2877     return {
2878       "OP_TARGET": self.cfg.GetClusterName(),
2879       "NEW_VG_NAME": self.op.vg_name,
2880       }
2881
2882   def BuildHooksNodes(self):
2883     """Build hooks nodes.
2884
2885     """
2886     mn = self.cfg.GetMasterNode()
2887     return ([mn], [mn])
2888
2889   def CheckPrereq(self):
2890     """Check prerequisites.
2891
2892     This checks whether the given params don't conflict and
2893     if the given volume group is valid.
2894
2895     """
2896     if self.op.vg_name is not None and not self.op.vg_name:
2897       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2898         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2899                                    " instances exist", errors.ECODE_INVAL)
2900
2901     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2902       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2903         raise errors.OpPrereqError("Cannot disable drbd helper while"
2904                                    " drbd-based instances exist",
2905                                    errors.ECODE_INVAL)
2906
2907     node_list = self.acquired_locks[locking.LEVEL_NODE]
2908
2909     # if vg_name not None, checks given volume group on all nodes
2910     if self.op.vg_name:
2911       vglist = self.rpc.call_vg_list(node_list)
2912       for node in node_list:
2913         msg = vglist[node].fail_msg
2914         if msg:
2915           # ignoring down node
2916           self.LogWarning("Error while gathering data on node %s"
2917                           " (ignoring node): %s", node, msg)
2918           continue
2919         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2920                                               self.op.vg_name,
2921                                               constants.MIN_VG_SIZE)
2922         if vgstatus:
2923           raise errors.OpPrereqError("Error on node '%s': %s" %
2924                                      (node, vgstatus), errors.ECODE_ENVIRON)
2925
2926     if self.op.drbd_helper:
2927       # checks given drbd helper on all nodes
2928       helpers = self.rpc.call_drbd_helper(node_list)
2929       for node in node_list:
2930         ninfo = self.cfg.GetNodeInfo(node)
2931         if ninfo.offline:
2932           self.LogInfo("Not checking drbd helper on offline node %s", node)
2933           continue
2934         msg = helpers[node].fail_msg
2935         if msg:
2936           raise errors.OpPrereqError("Error checking drbd helper on node"
2937                                      " '%s': %s" % (node, msg),
2938                                      errors.ECODE_ENVIRON)
2939         node_helper = helpers[node].payload
2940         if node_helper != self.op.drbd_helper:
2941           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2942                                      (node, node_helper), errors.ECODE_ENVIRON)
2943
2944     self.cluster = cluster = self.cfg.GetClusterInfo()
2945     # validate params changes
2946     if self.op.beparams:
2947       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2948       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2949
2950     if self.op.ndparams:
2951       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2952       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2953
2954       # TODO: we need a more general way to handle resetting
2955       # cluster-level parameters to default values
2956       if self.new_ndparams["oob_program"] == "":
2957         self.new_ndparams["oob_program"] = \
2958             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2959
2960     if self.op.nicparams:
2961       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2962       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2963       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2964       nic_errors = []
2965
2966       # check all instances for consistency
2967       for instance in self.cfg.GetAllInstancesInfo().values():
2968         for nic_idx, nic in enumerate(instance.nics):
2969           params_copy = copy.deepcopy(nic.nicparams)
2970           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2971
2972           # check parameter syntax
2973           try:
2974             objects.NIC.CheckParameterSyntax(params_filled)
2975           except errors.ConfigurationError, err:
2976             nic_errors.append("Instance %s, nic/%d: %s" %
2977                               (instance.name, nic_idx, err))
2978
2979           # if we're moving instances to routed, check that they have an ip
2980           target_mode = params_filled[constants.NIC_MODE]
2981           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2982             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2983                               (instance.name, nic_idx))
2984       if nic_errors:
2985         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2986                                    "\n".join(nic_errors))
2987
2988     # hypervisor list/parameters
2989     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2990     if self.op.hvparams:
2991       for hv_name, hv_dict in self.op.hvparams.items():
2992         if hv_name not in self.new_hvparams:
2993           self.new_hvparams[hv_name] = hv_dict
2994         else:
2995           self.new_hvparams[hv_name].update(hv_dict)
2996
2997     # os hypervisor parameters
2998     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2999     if self.op.os_hvp:
3000       for os_name, hvs in self.op.os_hvp.items():
3001         if os_name not in self.new_os_hvp:
3002           self.new_os_hvp[os_name] = hvs
3003         else:
3004           for hv_name, hv_dict in hvs.items():
3005             if hv_name not in self.new_os_hvp[os_name]:
3006               self.new_os_hvp[os_name][hv_name] = hv_dict
3007             else:
3008               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3009
3010     # os parameters
3011     self.new_osp = objects.FillDict(cluster.osparams, {})
3012     if self.op.osparams:
3013       for os_name, osp in self.op.osparams.items():
3014         if os_name not in self.new_osp:
3015           self.new_osp[os_name] = {}
3016
3017         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3018                                                   use_none=True)
3019
3020         if not self.new_osp[os_name]:
3021           # we removed all parameters
3022           del self.new_osp[os_name]
3023         else:
3024           # check the parameter validity (remote check)
3025           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3026                          os_name, self.new_osp[os_name])
3027
3028     # changes to the hypervisor list
3029     if self.op.enabled_hypervisors is not None:
3030       self.hv_list = self.op.enabled_hypervisors
3031       for hv in self.hv_list:
3032         # if the hypervisor doesn't already exist in the cluster
3033         # hvparams, we initialize it to empty, and then (in both
3034         # cases) we make sure to fill the defaults, as we might not
3035         # have a complete defaults list if the hypervisor wasn't
3036         # enabled before
3037         if hv not in new_hvp:
3038           new_hvp[hv] = {}
3039         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3040         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3041     else:
3042       self.hv_list = cluster.enabled_hypervisors
3043
3044     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3045       # either the enabled list has changed, or the parameters have, validate
3046       for hv_name, hv_params in self.new_hvparams.items():
3047         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3048             (self.op.enabled_hypervisors and
3049              hv_name in self.op.enabled_hypervisors)):
3050           # either this is a new hypervisor, or its parameters have changed
3051           hv_class = hypervisor.GetHypervisor(hv_name)
3052           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3053           hv_class.CheckParameterSyntax(hv_params)
3054           _CheckHVParams(self, node_list, hv_name, hv_params)
3055
3056     if self.op.os_hvp:
3057       # no need to check any newly-enabled hypervisors, since the
3058       # defaults have already been checked in the above code-block
3059       for os_name, os_hvp in self.new_os_hvp.items():
3060         for hv_name, hv_params in os_hvp.items():
3061           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3062           # we need to fill in the new os_hvp on top of the actual hv_p
3063           cluster_defaults = self.new_hvparams.get(hv_name, {})
3064           new_osp = objects.FillDict(cluster_defaults, hv_params)
3065           hv_class = hypervisor.GetHypervisor(hv_name)
3066           hv_class.CheckParameterSyntax(new_osp)
3067           _CheckHVParams(self, node_list, hv_name, new_osp)
3068
3069     if self.op.default_iallocator:
3070       alloc_script = utils.FindFile(self.op.default_iallocator,
3071                                     constants.IALLOCATOR_SEARCH_PATH,
3072                                     os.path.isfile)
3073       if alloc_script is None:
3074         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3075                                    " specified" % self.op.default_iallocator,
3076                                    errors.ECODE_INVAL)
3077
3078   def Exec(self, feedback_fn):
3079     """Change the parameters of the cluster.
3080
3081     """
3082     if self.op.vg_name is not None:
3083       new_volume = self.op.vg_name
3084       if not new_volume:
3085         new_volume = None
3086       if new_volume != self.cfg.GetVGName():
3087         self.cfg.SetVGName(new_volume)
3088       else:
3089         feedback_fn("Cluster LVM configuration already in desired"
3090                     " state, not changing")
3091     if self.op.drbd_helper is not None:
3092       new_helper = self.op.drbd_helper
3093       if not new_helper:
3094         new_helper = None
3095       if new_helper != self.cfg.GetDRBDHelper():
3096         self.cfg.SetDRBDHelper(new_helper)
3097       else:
3098         feedback_fn("Cluster DRBD helper already in desired state,"
3099                     " not changing")
3100     if self.op.hvparams:
3101       self.cluster.hvparams = self.new_hvparams
3102     if self.op.os_hvp:
3103       self.cluster.os_hvp = self.new_os_hvp
3104     if self.op.enabled_hypervisors is not None:
3105       self.cluster.hvparams = self.new_hvparams
3106       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3107     if self.op.beparams:
3108       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3109     if self.op.nicparams:
3110       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3111     if self.op.osparams:
3112       self.cluster.osparams = self.new_osp
3113     if self.op.ndparams:
3114       self.cluster.ndparams = self.new_ndparams
3115
3116     if self.op.candidate_pool_size is not None:
3117       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3118       # we need to update the pool size here, otherwise the save will fail
3119       _AdjustCandidatePool(self, [])
3120
3121     if self.op.maintain_node_health is not None:
3122       self.cluster.maintain_node_health = self.op.maintain_node_health
3123
3124     if self.op.prealloc_wipe_disks is not None:
3125       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3126
3127     if self.op.add_uids is not None:
3128       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3129
3130     if self.op.remove_uids is not None:
3131       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3132
3133     if self.op.uid_pool is not None:
3134       self.cluster.uid_pool = self.op.uid_pool
3135
3136     if self.op.default_iallocator is not None:
3137       self.cluster.default_iallocator = self.op.default_iallocator
3138
3139     if self.op.reserved_lvs is not None:
3140       self.cluster.reserved_lvs = self.op.reserved_lvs
3141
3142     def helper_os(aname, mods, desc):
3143       desc += " OS list"
3144       lst = getattr(self.cluster, aname)
3145       for key, val in mods:
3146         if key == constants.DDM_ADD:
3147           if val in lst:
3148             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3149           else:
3150             lst.append(val)
3151         elif key == constants.DDM_REMOVE:
3152           if val in lst:
3153             lst.remove(val)
3154           else:
3155             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3156         else:
3157           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3158
3159     if self.op.hidden_os:
3160       helper_os("hidden_os", self.op.hidden_os, "hidden")
3161
3162     if self.op.blacklisted_os:
3163       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3164
3165     if self.op.master_netdev:
3166       master = self.cfg.GetMasterNode()
3167       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3168                   self.cluster.master_netdev)
3169       result = self.rpc.call_node_stop_master(master, False)
3170       result.Raise("Could not disable the master ip")
3171       feedback_fn("Changing master_netdev from %s to %s" %
3172                   (self.cluster.master_netdev, self.op.master_netdev))
3173       self.cluster.master_netdev = self.op.master_netdev
3174
3175     self.cfg.Update(self.cluster, feedback_fn)
3176
3177     if self.op.master_netdev:
3178       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3179                   self.op.master_netdev)
3180       result = self.rpc.call_node_start_master(master, False, False)
3181       if result.fail_msg:
3182         self.LogWarning("Could not re-enable the master ip on"
3183                         " the master, please restart manually: %s",
3184                         result.fail_msg)
3185
3186
3187 def _UploadHelper(lu, nodes, fname):
3188   """Helper for uploading a file and showing warnings.
3189
3190   """
3191   if os.path.exists(fname):
3192     result = lu.rpc.call_upload_file(nodes, fname)
3193     for to_node, to_result in result.items():
3194       msg = to_result.fail_msg
3195       if msg:
3196         msg = ("Copy of file %s to node %s failed: %s" %
3197                (fname, to_node, msg))
3198         lu.proc.LogWarning(msg)
3199
3200
3201 def _ComputeAncillaryFiles(cluster, redist):
3202   """Compute files external to Ganeti which need to be consistent.
3203
3204   @type redist: boolean
3205   @param redist: Whether to include files which need to be redistributed
3206
3207   """
3208   # Compute files for all nodes
3209   files_all = set([
3210     constants.SSH_KNOWN_HOSTS_FILE,
3211     constants.CONFD_HMAC_KEY,
3212     constants.CLUSTER_DOMAIN_SECRET_FILE,
3213     ])
3214
3215   if not redist:
3216     files_all.update(constants.ALL_CERT_FILES)
3217     files_all.update(ssconf.SimpleStore().GetFileList())
3218
3219   if cluster.modify_etc_hosts:
3220     files_all.add(constants.ETC_HOSTS)
3221
3222   # Files which must either exist on all nodes or on none
3223   files_all_opt = set([
3224     constants.RAPI_USERS_FILE,
3225     ])
3226
3227   # Files which should only be on master candidates
3228   files_mc = set()
3229   if not redist:
3230     files_mc.add(constants.CLUSTER_CONF_FILE)
3231
3232   # Files which should only be on VM-capable nodes
3233   files_vm = set(filename
3234     for hv_name in cluster.enabled_hypervisors
3235     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3236
3237   # Filenames must be unique
3238   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3239           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3240          "Found file listed in more than one file list"
3241
3242   return (files_all, files_all_opt, files_mc, files_vm)
3243
3244
3245 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3246   """Distribute additional files which are part of the cluster configuration.
3247
3248   ConfigWriter takes care of distributing the config and ssconf files, but
3249   there are more files which should be distributed to all nodes. This function
3250   makes sure those are copied.
3251
3252   @param lu: calling logical unit
3253   @param additional_nodes: list of nodes not in the config to distribute to
3254   @type additional_vm: boolean
3255   @param additional_vm: whether the additional nodes are vm-capable or not
3256
3257   """
3258   # Gather target nodes
3259   cluster = lu.cfg.GetClusterInfo()
3260   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3261
3262   online_nodes = lu.cfg.GetOnlineNodeList()
3263   vm_nodes = lu.cfg.GetVmCapableNodeList()
3264
3265   if additional_nodes is not None:
3266     online_nodes.extend(additional_nodes)
3267     if additional_vm:
3268       vm_nodes.extend(additional_nodes)
3269
3270   # Never distribute to master node
3271   for nodelist in [online_nodes, vm_nodes]:
3272     if master_info.name in nodelist:
3273       nodelist.remove(master_info.name)
3274
3275   # Gather file lists
3276   (files_all, files_all_opt, files_mc, files_vm) = \
3277     _ComputeAncillaryFiles(cluster, True)
3278
3279   # Never re-distribute configuration file from here
3280   assert not (constants.CLUSTER_CONF_FILE in files_all or
3281               constants.CLUSTER_CONF_FILE in files_vm)
3282   assert not files_mc, "Master candidates not handled in this function"
3283
3284   filemap = [
3285     (online_nodes, files_all),
3286     (online_nodes, files_all_opt),
3287     (vm_nodes, files_vm),
3288     ]
3289
3290   # Upload the files
3291   for (node_list, files) in filemap:
3292     for fname in files:
3293       _UploadHelper(lu, node_list, fname)
3294
3295
3296 class LUClusterRedistConf(NoHooksLU):
3297   """Force the redistribution of cluster configuration.
3298
3299   This is a very simple LU.
3300
3301   """
3302   REQ_BGL = False
3303
3304   def ExpandNames(self):
3305     self.needed_locks = {
3306       locking.LEVEL_NODE: locking.ALL_SET,
3307     }
3308     self.share_locks[locking.LEVEL_NODE] = 1
3309
3310   def Exec(self, feedback_fn):
3311     """Redistribute the configuration.
3312
3313     """
3314     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3315     _RedistributeAncillaryFiles(self)
3316
3317
3318 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3319   """Sleep and poll for an instance's disk to sync.
3320
3321   """
3322   if not instance.disks or disks is not None and not disks:
3323     return True
3324
3325   disks = _ExpandCheckDisks(instance, disks)
3326
3327   if not oneshot:
3328     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3329
3330   node = instance.primary_node
3331
3332   for dev in disks:
3333     lu.cfg.SetDiskID(dev, node)
3334
3335   # TODO: Convert to utils.Retry
3336
3337   retries = 0
3338   degr_retries = 10 # in seconds, as we sleep 1 second each time
3339   while True:
3340     max_time = 0
3341     done = True
3342     cumul_degraded = False
3343     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3344     msg = rstats.fail_msg
3345     if msg:
3346       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3347       retries += 1
3348       if retries >= 10:
3349         raise errors.RemoteError("Can't contact node %s for mirror data,"
3350                                  " aborting." % node)
3351       time.sleep(6)
3352       continue
3353     rstats = rstats.payload
3354     retries = 0
3355     for i, mstat in enumerate(rstats):
3356       if mstat is None:
3357         lu.LogWarning("Can't compute data for node %s/%s",
3358                            node, disks[i].iv_name)
3359         continue
3360
3361       cumul_degraded = (cumul_degraded or
3362                         (mstat.is_degraded and mstat.sync_percent is None))
3363       if mstat.sync_percent is not None:
3364         done = False
3365         if mstat.estimated_time is not None:
3366           rem_time = ("%s remaining (estimated)" %
3367                       utils.FormatSeconds(mstat.estimated_time))
3368           max_time = mstat.estimated_time
3369         else:
3370           rem_time = "no time estimate"
3371         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3372                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3373
3374     # if we're done but degraded, let's do a few small retries, to
3375     # make sure we see a stable and not transient situation; therefore
3376     # we force restart of the loop
3377     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3378       logging.info("Degraded disks found, %d retries left", degr_retries)
3379       degr_retries -= 1
3380       time.sleep(1)
3381       continue
3382
3383     if done or oneshot:
3384       break
3385
3386     time.sleep(min(60, max_time))
3387
3388   if done:
3389     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3390   return not cumul_degraded
3391
3392
3393 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3394   """Check that mirrors are not degraded.
3395
3396   The ldisk parameter, if True, will change the test from the
3397   is_degraded attribute (which represents overall non-ok status for
3398   the device(s)) to the ldisk (representing the local storage status).
3399
3400   """
3401   lu.cfg.SetDiskID(dev, node)
3402
3403   result = True
3404
3405   if on_primary or dev.AssembleOnSecondary():
3406     rstats = lu.rpc.call_blockdev_find(node, dev)
3407     msg = rstats.fail_msg
3408     if msg:
3409       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3410       result = False
3411     elif not rstats.payload:
3412       lu.LogWarning("Can't find disk on node %s", node)
3413       result = False
3414     else:
3415       if ldisk:
3416         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3417       else:
3418         result = result and not rstats.payload.is_degraded
3419
3420   if dev.children:
3421     for child in dev.children:
3422       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3423
3424   return result
3425
3426
3427 class LUOobCommand(NoHooksLU):
3428   """Logical unit for OOB handling.
3429
3430   """
3431   REG_BGL = False
3432   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3433
3434   def CheckPrereq(self):
3435     """Check prerequisites.
3436
3437     This checks:
3438      - the node exists in the configuration
3439      - OOB is supported
3440
3441     Any errors are signaled by raising errors.OpPrereqError.
3442
3443     """
3444     self.nodes = []
3445     self.master_node = self.cfg.GetMasterNode()
3446
3447     assert self.op.power_delay >= 0.0
3448
3449     if self.op.node_names:
3450       if self.op.command in self._SKIP_MASTER:
3451         if self.master_node in self.op.node_names:
3452           master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3453           master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3454
3455           if master_oob_handler:
3456             additional_text = ("Run '%s %s %s' if you want to operate on the"
3457                                " master regardless") % (master_oob_handler,
3458                                                         self.op.command,
3459                                                         self.master_node)
3460           else:
3461             additional_text = "The master node does not support out-of-band"
3462
3463           raise errors.OpPrereqError(("Operating on the master node %s is not"
3464                                       " allowed for %s\n%s") %
3465                                      (self.master_node, self.op.command,
3466                                       additional_text), errors.ECODE_INVAL)
3467     else:
3468       self.op.node_names = self.cfg.GetNodeList()
3469       if self.op.command in self._SKIP_MASTER:
3470         self.op.node_names.remove(self.master_node)
3471
3472     if self.op.command in self._SKIP_MASTER:
3473       assert self.master_node not in self.op.node_names
3474
3475     for node_name in self.op.node_names:
3476       node = self.cfg.GetNodeInfo(node_name)
3477
3478       if node is None:
3479         raise errors.OpPrereqError("Node %s not found" % node_name,
3480                                    errors.ECODE_NOENT)
3481       else:
3482         self.nodes.append(node)
3483
3484       if (not self.op.ignore_status and
3485           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3486         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3487                                     " not marked offline") % node_name,
3488                                    errors.ECODE_STATE)
3489
3490   def ExpandNames(self):
3491     """Gather locks we need.
3492
3493     """
3494     if self.op.node_names:
3495       self.op.node_names = [_ExpandNodeName(self.cfg, name)
3496                             for name in self.op.node_names]
3497       lock_names = self.op.node_names
3498     else:
3499       lock_names = locking.ALL_SET
3500
3501     self.needed_locks = {
3502       locking.LEVEL_NODE: lock_names,
3503       }
3504
3505   def Exec(self, feedback_fn):
3506     """Execute OOB and return result if we expect any.
3507
3508     """
3509     master_node = self.master_node
3510     ret = []
3511
3512     for idx, node in enumerate(self.nodes):
3513       node_entry = [(constants.RS_NORMAL, node.name)]
3514       ret.append(node_entry)
3515
3516       oob_program = _SupportsOob(self.cfg, node)
3517
3518       if not oob_program:
3519         node_entry.append((constants.RS_UNAVAIL, None))
3520         continue
3521
3522       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3523                    self.op.command, oob_program, node.name)
3524       result = self.rpc.call_run_oob(master_node, oob_program,
3525                                      self.op.command, node.name,
3526                                      self.op.timeout)
3527
3528       if result.fail_msg:
3529         self.LogWarning("On node '%s' out-of-band RPC failed with: %s",
3530                         node.name, result.fail_msg)
3531         node_entry.append((constants.RS_NODATA, None))
3532       else:
3533         try:
3534           self._CheckPayload(result)
3535         except errors.OpExecError, err:
3536           self.LogWarning("The payload returned by '%s' is not valid: %s",
3537                           node.name, err)
3538           node_entry.append((constants.RS_NODATA, None))
3539         else:
3540           if self.op.command == constants.OOB_HEALTH:
3541             # For health we should log important events
3542             for item, status in result.payload:
3543               if status in [constants.OOB_STATUS_WARNING,
3544                             constants.OOB_STATUS_CRITICAL]:
3545                 self.LogWarning("On node '%s' item '%s' has status '%s'",
3546                                 node.name, item, status)
3547
3548           if self.op.command == constants.OOB_POWER_ON:
3549             node.powered = True
3550           elif self.op.command == constants.OOB_POWER_OFF:
3551             node.powered = False
3552           elif self.op.command == constants.OOB_POWER_STATUS:
3553             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3554             if powered != node.powered:
3555               logging.warning(("Recorded power state (%s) of node '%s' does not"
3556                                " match actual power state (%s)"), node.powered,
3557                               node.name, powered)
3558
3559           # For configuration changing commands we should update the node
3560           if self.op.command in (constants.OOB_POWER_ON,
3561                                  constants.OOB_POWER_OFF):
3562             self.cfg.Update(node, feedback_fn)
3563
3564           node_entry.append((constants.RS_NORMAL, result.payload))
3565
3566           if (self.op.command == constants.OOB_POWER_ON and
3567               idx < len(self.nodes) - 1):
3568             time.sleep(self.op.power_delay)
3569
3570     return ret
3571
3572   def _CheckPayload(self, result):
3573     """Checks if the payload is valid.
3574
3575     @param result: RPC result
3576     @raises errors.OpExecError: If payload is not valid
3577
3578     """
3579     errs = []
3580     if self.op.command == constants.OOB_HEALTH:
3581       if not isinstance(result.payload, list):
3582         errs.append("command 'health' is expected to return a list but got %s" %
3583                     type(result.payload))
3584       else:
3585         for item, status in result.payload:
3586           if status not in constants.OOB_STATUSES:
3587             errs.append("health item '%s' has invalid status '%s'" %
3588                         (item, status))
3589
3590     if self.op.command == constants.OOB_POWER_STATUS:
3591       if not isinstance(result.payload, dict):
3592         errs.append("power-status is expected to return a dict but got %s" %
3593                     type(result.payload))
3594
3595     if self.op.command in [
3596         constants.OOB_POWER_ON,
3597         constants.OOB_POWER_OFF,
3598         constants.OOB_POWER_CYCLE,
3599         ]:
3600       if result.payload is not None:
3601         errs.append("%s is expected to not return payload but got '%s'" %
3602                     (self.op.command, result.payload))
3603
3604     if errs:
3605       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3606                                utils.CommaJoin(errs))
3607
3608 class _OsQuery(_QueryBase):
3609   FIELDS = query.OS_FIELDS
3610
3611   def ExpandNames(self, lu):
3612     # Lock all nodes in shared mode
3613     # Temporary removal of locks, should be reverted later
3614     # TODO: reintroduce locks when they are lighter-weight
3615     lu.needed_locks = {}
3616     #self.share_locks[locking.LEVEL_NODE] = 1
3617     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3618
3619     # The following variables interact with _QueryBase._GetNames
3620     if self.names:
3621       self.wanted = self.names
3622     else:
3623       self.wanted = locking.ALL_SET
3624
3625     self.do_locking = self.use_locking
3626
3627   def DeclareLocks(self, lu, level):
3628     pass
3629
3630   @staticmethod
3631   def _DiagnoseByOS(rlist):
3632     """Remaps a per-node return list into an a per-os per-node dictionary
3633
3634     @param rlist: a map with node names as keys and OS objects as values
3635
3636     @rtype: dict
3637     @return: a dictionary with osnames as keys and as value another
3638         map, with nodes as keys and tuples of (path, status, diagnose,
3639         variants, parameters, api_versions) as values, eg::
3640
3641           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3642                                      (/srv/..., False, "invalid api")],
3643                            "node2": [(/srv/..., True, "", [], [])]}
3644           }
3645
3646     """
3647     all_os = {}
3648     # we build here the list of nodes that didn't fail the RPC (at RPC
3649     # level), so that nodes with a non-responding node daemon don't
3650     # make all OSes invalid
3651     good_nodes = [node_name for node_name in rlist
3652                   if not rlist[node_name].fail_msg]
3653     for node_name, nr in rlist.items():
3654       if nr.fail_msg or not nr.payload:
3655         continue
3656       for (name, path, status, diagnose, variants,
3657            params, api_versions) in nr.payload:
3658         if name not in all_os:
3659           # build a list of nodes for this os containing empty lists
3660           # for each node in node_list
3661           all_os[name] = {}
3662           for nname in good_nodes:
3663             all_os[name][nname] = []
3664         # convert params from [name, help] to (name, help)
3665         params = [tuple(v) for v in params]
3666         all_os[name][node_name].append((path, status, diagnose,
3667                                         variants, params, api_versions))
3668     return all_os
3669
3670   def _GetQueryData(self, lu):
3671     """Computes the list of nodes and their attributes.
3672
3673     """
3674     # Locking is not used
3675     assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3676
3677     valid_nodes = [node.name
3678                    for node in lu.cfg.GetAllNodesInfo().values()
3679                    if not node.offline and node.vm_capable]
3680     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3681     cluster = lu.cfg.GetClusterInfo()
3682
3683     data = {}
3684
3685     for (os_name, os_data) in pol.items():
3686       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3687                           hidden=(os_name in cluster.hidden_os),
3688                           blacklisted=(os_name in cluster.blacklisted_os))
3689
3690       variants = set()
3691       parameters = set()
3692       api_versions = set()
3693
3694       for idx, osl in enumerate(os_data.values()):
3695         info.valid = bool(info.valid and osl and osl[0][1])
3696         if not info.valid:
3697           break
3698
3699         (node_variants, node_params, node_api) = osl[0][3:6]
3700         if idx == 0:
3701           # First entry
3702           variants.update(node_variants)
3703           parameters.update(node_params)
3704           api_versions.update(node_api)
3705         else:
3706           # Filter out inconsistent values
3707           variants.intersection_update(node_variants)
3708           parameters.intersection_update(node_params)
3709           api_versions.intersection_update(node_api)
3710
3711       info.variants = list(variants)
3712       info.parameters = list(parameters)
3713       info.api_versions = list(api_versions)
3714
3715       data[os_name] = info
3716
3717     # Prepare data in requested order
3718     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3719             if name in data]
3720
3721
3722 class LUOsDiagnose(NoHooksLU):
3723   """Logical unit for OS diagnose/query.
3724
3725   """
3726   REQ_BGL = False
3727
3728   @staticmethod
3729   def _BuildFilter(fields, names):
3730     """Builds a filter for querying OSes.
3731
3732     """
3733     name_filter = qlang.MakeSimpleFilter("name", names)
3734
3735     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3736     # respective field is not requested
3737     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3738                      for fname in ["hidden", "blacklisted"]
3739                      if fname not in fields]
3740     if "valid" not in fields:
3741       status_filter.append([qlang.OP_TRUE, "valid"])
3742
3743     if status_filter:
3744       status_filter.insert(0, qlang.OP_AND)
3745     else:
3746       status_filter = None
3747
3748     if name_filter and status_filter:
3749       return [qlang.OP_AND, name_filter, status_filter]
3750     elif name_filter:
3751       return name_filter
3752     else:
3753       return status_filter
3754
3755   def CheckArguments(self):
3756     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3757                        self.op.output_fields, False)
3758
3759   def ExpandNames(self):
3760     self.oq.ExpandNames(self)
3761
3762   def Exec(self, feedback_fn):
3763     return self.oq.OldStyleQuery(self)
3764
3765
3766 class LUNodeRemove(LogicalUnit):
3767   """Logical unit for removing a node.
3768
3769   """
3770   HPATH = "node-remove"
3771   HTYPE = constants.HTYPE_NODE
3772
3773   def BuildHooksEnv(self):
3774     """Build hooks env.
3775
3776     This doesn't run on the target node in the pre phase as a failed
3777     node would then be impossible to remove.
3778
3779     """
3780     return {
3781       "OP_TARGET": self.op.node_name,
3782       "NODE_NAME": self.op.node_name,
3783       }
3784
3785   def BuildHooksNodes(self):
3786     """Build hooks nodes.
3787
3788     """
3789     all_nodes = self.cfg.GetNodeList()
3790     try:
3791       all_nodes.remove(self.op.node_name)
3792     except ValueError:
3793       logging.warning("Node '%s', which is about to be removed, was not found"
3794                       " in the list of all nodes", self.op.node_name)
3795     return (all_nodes, all_nodes)
3796
3797   def CheckPrereq(self):
3798     """Check prerequisites.
3799
3800     This checks:
3801      - the node exists in the configuration
3802      - it does not have primary or secondary instances
3803      - it's not the master
3804
3805     Any errors are signaled by raising errors.OpPrereqError.
3806
3807     """
3808     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3809     node = self.cfg.GetNodeInfo(self.op.node_name)
3810     assert node is not None
3811
3812     instance_list = self.cfg.GetInstanceList()
3813
3814     masternode = self.cfg.GetMasterNode()
3815     if node.name == masternode:
3816       raise errors.OpPrereqError("Node is the master node,"
3817                                  " you need to failover first.",
3818                                  errors.ECODE_INVAL)
3819
3820     for instance_name in instance_list:
3821       instance = self.cfg.GetInstanceInfo(instance_name)
3822       if node.name in instance.all_nodes:
3823         raise errors.OpPrereqError("Instance %s is still running on the node,"
3824                                    " please remove first." % instance_name,
3825                                    errors.ECODE_INVAL)
3826     self.op.node_name = node.name
3827     self.node = node
3828
3829   def Exec(self, feedback_fn):
3830     """Removes the node from the cluster.
3831
3832     """
3833     node = self.node
3834     logging.info("Stopping the node daemon and removing configs from node %s",
3835                  node.name)
3836
3837     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3838
3839     # Promote nodes to master candidate as needed
3840     _AdjustCandidatePool(self, exceptions=[node.name])
3841     self.context.RemoveNode(node.name)
3842
3843     # Run post hooks on the node before it's removed
3844     _RunPostHook(self, node.name)
3845
3846     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3847     msg = result.fail_msg
3848     if msg:
3849       self.LogWarning("Errors encountered on the remote node while leaving"
3850                       " the cluster: %s", msg)
3851
3852     # Remove node from our /etc/hosts
3853     if self.cfg.GetClusterInfo().modify_etc_hosts:
3854       master_node = self.cfg.GetMasterNode()
3855       result = self.rpc.call_etc_hosts_modify(master_node,
3856                                               constants.ETC_HOSTS_REMOVE,
3857                                               node.name, None)
3858       result.Raise("Can't update hosts file with new host data")
3859       _RedistributeAncillaryFiles(self)
3860
3861
3862 class _NodeQuery(_QueryBase):
3863   FIELDS = query.NODE_FIELDS
3864
3865   def ExpandNames(self, lu):
3866     lu.needed_locks = {}
3867     lu.share_locks[locking.LEVEL_NODE] = 1
3868
3869     if self.names:
3870       self.wanted = _GetWantedNodes(lu, self.names)
3871     else:
3872       self.wanted = locking.ALL_SET
3873
3874     self.do_locking = (self.use_locking and
3875                        query.NQ_LIVE in self.requested_data)
3876
3877     if self.do_locking:
3878       # if we don't request only static fields, we need to lock the nodes
3879       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3880
3881   def DeclareLocks(self, lu, level):
3882     pass
3883
3884   def _GetQueryData(self, lu):
3885     """Computes the list of nodes and their attributes.
3886
3887     """
3888     all_info = lu.cfg.GetAllNodesInfo()
3889
3890     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3891
3892     # Gather data as requested
3893     if query.NQ_LIVE in self.requested_data:
3894       # filter out non-vm_capable nodes
3895       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3896
3897       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3898                                         lu.cfg.GetHypervisorType())
3899       live_data = dict((name, nresult.payload)
3900                        for (name, nresult) in node_data.items()
3901                        if not nresult.fail_msg and nresult.payload)
3902     else:
3903       live_data = None
3904
3905     if query.NQ_INST in self.requested_data:
3906       node_to_primary = dict([(name, set()) for name in nodenames])
3907       node_to_secondary = dict([(name, set()) for name in nodenames])
3908
3909       inst_data = lu.cfg.GetAllInstancesInfo()
3910
3911       for inst in inst_data.values():
3912         if inst.primary_node in node_to_primary:
3913           node_to_primary[inst.primary_node].add(inst.name)
3914         for secnode in inst.secondary_nodes:
3915           if secnode in node_to_secondary:
3916             node_to_secondary[secnode].add(inst.name)
3917     else:
3918       node_to_primary = None
3919       node_to_secondary = None
3920
3921     if query.NQ_OOB in self.requested_data:
3922       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3923                          for name, node in all_info.iteritems())
3924     else:
3925       oob_support = None
3926
3927     if query.NQ_GROUP in self.requested_data:
3928       groups = lu.cfg.GetAllNodeGroupsInfo()
3929     else:
3930       groups = {}
3931
3932     return query.NodeQueryData([all_info[name] for name in nodenames],
3933                                live_data, lu.cfg.GetMasterNode(),
3934                                node_to_primary, node_to_secondary, groups,
3935                                oob_support, lu.cfg.GetClusterInfo())
3936
3937
3938 class LUNodeQuery(NoHooksLU):
3939   """Logical unit for querying nodes.
3940
3941   """
3942   # pylint: disable-msg=W0142
3943   REQ_BGL = False
3944
3945   def CheckArguments(self):
3946     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3947                          self.op.output_fields, self.op.use_locking)
3948
3949   def ExpandNames(self):
3950     self.nq.ExpandNames(self)
3951
3952   def Exec(self, feedback_fn):
3953     return self.nq.OldStyleQuery(self)
3954
3955
3956 class LUNodeQueryvols(NoHooksLU):
3957   """Logical unit for getting volumes on node(s).
3958
3959   """
3960   REQ_BGL = False
3961   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3962   _FIELDS_STATIC = utils.FieldSet("node")
3963
3964   def CheckArguments(self):
3965     _CheckOutputFields(static=self._FIELDS_STATIC,
3966                        dynamic=self._FIELDS_DYNAMIC,
3967                        selected=self.op.output_fields)
3968
3969   def ExpandNames(self):
3970     self.needed_locks = {}
3971     self.share_locks[locking.LEVEL_NODE] = 1
3972     if not self.op.nodes:
3973       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3974     else:
3975       self.needed_locks[locking.LEVEL_NODE] = \
3976         _GetWantedNodes(self, self.op.nodes)
3977
3978   def Exec(self, feedback_fn):
3979     """Computes the list of nodes and their attributes.
3980
3981     """
3982     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3983     volumes = self.rpc.call_node_volumes(nodenames)
3984
3985     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3986              in self.cfg.GetInstanceList()]
3987
3988     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3989
3990     output = []
3991     for node in nodenames:
3992       nresult = volumes[node]
3993       if nresult.offline:
3994         continue
3995       msg = nresult.fail_msg
3996       if msg:
3997         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3998         continue
3999
4000       node_vols = nresult.payload[:]
4001       node_vols.sort(key=lambda vol: vol['dev'])
4002
4003       for vol in node_vols:
4004         node_output = []
4005         for field in self.op.output_fields:
4006           if field == "node":
4007             val = node
4008           elif field == "phys":
4009             val = vol['dev']
4010           elif field == "vg":
4011             val = vol['vg']
4012           elif field == "name":
4013             val = vol['name']
4014           elif field == "size":
4015             val = int(float(vol['size']))
4016           elif field == "instance":
4017             for inst in ilist:
4018               if node not in lv_by_node[inst]:
4019                 continue
4020               if vol['name'] in lv_by_node[inst][node]:
4021                 val = inst.name
4022                 break
4023             else:
4024               val = '-'
4025           else:
4026             raise errors.ParameterError(field)
4027           node_output.append(str(val))
4028
4029         output.append(node_output)
4030
4031     return output
4032
4033
4034 class LUNodeQueryStorage(NoHooksLU):
4035   """Logical unit for getting information on storage units on node(s).
4036
4037   """
4038   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4039   REQ_BGL = False
4040
4041   def CheckArguments(self):
4042     _CheckOutputFields(static=self._FIELDS_STATIC,
4043                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4044                        selected=self.op.output_fields)
4045
4046   def ExpandNames(self):
4047     self.needed_locks = {}
4048     self.share_locks[locking.LEVEL_NODE] = 1
4049
4050     if self.op.nodes:
4051       self.needed_locks[locking.LEVEL_NODE] = \
4052         _GetWantedNodes(self, self.op.nodes)
4053     else:
4054       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4055
4056   def Exec(self, feedback_fn):
4057     """Computes the list of nodes and their attributes.
4058
4059     """
4060     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
4061
4062     # Always get name to sort by
4063     if constants.SF_NAME in self.op.output_fields:
4064       fields = self.op.output_fields[:]
4065     else:
4066       fields = [constants.SF_NAME] + self.op.output_fields
4067
4068     # Never ask for node or type as it's only known to the LU
4069     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4070       while extra in fields:
4071         fields.remove(extra)
4072
4073     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4074     name_idx = field_idx[constants.SF_NAME]
4075
4076     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4077     data = self.rpc.call_storage_list(self.nodes,
4078                                       self.op.storage_type, st_args,
4079                                       self.op.name, fields)
4080
4081     result = []
4082
4083     for node in utils.NiceSort(self.nodes):
4084       nresult = data[node]
4085       if nresult.offline:
4086         continue
4087
4088       msg = nresult.fail_msg
4089       if msg:
4090         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4091         continue
4092
4093       rows = dict([(row[name_idx], row) for row in nresult.payload])
4094
4095       for name in utils.NiceSort(rows.keys()):
4096         row = rows[name]
4097
4098         out = []
4099
4100         for field in self.op.output_fields:
4101           if field == constants.SF_NODE:
4102             val = node
4103           elif field == constants.SF_TYPE:
4104             val = self.op.storage_type
4105           elif field in field_idx:
4106             val = row[field_idx[field]]
4107           else:
4108             raise errors.ParameterError(field)
4109
4110           out.append(val)
4111
4112         result.append(out)
4113
4114     return result
4115
4116
4117 class _InstanceQuery(_QueryBase):
4118   FIELDS = query.INSTANCE_FIELDS
4119
4120   def ExpandNames(self, lu):
4121     lu.needed_locks = {}
4122     lu.share_locks[locking.LEVEL_INSTANCE] = 1
4123     lu.share_locks[locking.LEVEL_NODE] = 1
4124
4125     if self.names:
4126       self.wanted = _GetWantedInstances(lu, self.names)
4127     else:
4128       self.wanted = locking.ALL_SET
4129
4130     self.do_locking = (self.use_locking and
4131                        query.IQ_LIVE in self.requested_data)
4132     if self.do_locking:
4133       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4134       lu.needed_locks[locking.LEVEL_NODE] = []
4135       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4136
4137   def DeclareLocks(self, lu, level):
4138     if level == locking.LEVEL_NODE and self.do_locking:
4139       lu._LockInstancesNodes() # pylint: disable-msg=W0212
4140
4141   def _GetQueryData(self, lu):
4142     """Computes the list of instances and their attributes.
4143
4144     """
4145     cluster = lu.cfg.GetClusterInfo()
4146     all_info = lu.cfg.GetAllInstancesInfo()
4147
4148     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4149
4150     instance_list = [all_info[name] for name in instance_names]
4151     nodes = frozenset(itertools.chain(*(inst.all_nodes
4152                                         for inst in instance_list)))
4153     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4154     bad_nodes = []
4155     offline_nodes = []
4156     wrongnode_inst = set()
4157
4158     # Gather data as requested
4159     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4160       live_data = {}
4161       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4162       for name in nodes:
4163         result = node_data[name]
4164         if result.offline:
4165           # offline nodes will be in both lists
4166           assert result.fail_msg
4167           offline_nodes.append(name)
4168         if result.fail_msg:
4169           bad_nodes.append(name)
4170         elif result.payload:
4171           for inst in result.payload:
4172             if inst in all_info:
4173               if all_info[inst].primary_node == name:
4174                 live_data.update(result.payload)
4175               else:
4176                 wrongnode_inst.add(inst)
4177             else:
4178               # orphan instance; we don't list it here as we don't
4179               # handle this case yet in the output of instance listing
4180               logging.warning("Orphan instance '%s' found on node %s",
4181                               inst, name)
4182         # else no instance is alive
4183     else:
4184       live_data = {}
4185
4186     if query.IQ_DISKUSAGE in self.requested_data:
4187       disk_usage = dict((inst.name,
4188                          _ComputeDiskSize(inst.disk_template,
4189                                           [{constants.IDISK_SIZE: disk.size}
4190                                            for disk in inst.disks]))
4191                         for inst in instance_list)
4192     else:
4193       disk_usage = None
4194
4195     if query.IQ_CONSOLE in self.requested_data:
4196       consinfo = {}
4197       for inst in instance_list:
4198         if inst.name in live_data:
4199           # Instance is running
4200           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4201         else:
4202           consinfo[inst.name] = None
4203       assert set(consinfo.keys()) == set(instance_names)
4204     else:
4205       consinfo = None
4206
4207     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4208                                    disk_usage, offline_nodes, bad_nodes,
4209                                    live_data, wrongnode_inst, consinfo)
4210
4211
4212 class LUQuery(NoHooksLU):
4213   """Query for resources/items of a certain kind.
4214
4215   """
4216   # pylint: disable-msg=W0142
4217   REQ_BGL = False
4218
4219   def CheckArguments(self):
4220     qcls = _GetQueryImplementation(self.op.what)
4221
4222     self.impl = qcls(self.op.filter, self.op.fields, False)
4223
4224   def ExpandNames(self):
4225     self.impl.ExpandNames(self)
4226
4227   def DeclareLocks(self, level):
4228     self.impl.DeclareLocks(self, level)
4229
4230   def Exec(self, feedback_fn):
4231     return self.impl.NewStyleQuery(self)
4232
4233
4234 class LUQueryFields(NoHooksLU):
4235   """Query for resources/items of a certain kind.
4236
4237   """
4238   # pylint: disable-msg=W0142
4239   REQ_BGL = False
4240
4241   def CheckArguments(self):
4242     self.qcls = _GetQueryImplementation(self.op.what)
4243
4244   def ExpandNames(self):
4245     self.needed_locks = {}
4246
4247   def Exec(self, feedback_fn):
4248     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4249
4250
4251 class LUNodeModifyStorage(NoHooksLU):
4252   """Logical unit for modifying a storage volume on a node.
4253
4254   """
4255   REQ_BGL = False
4256
4257   def CheckArguments(self):
4258     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4259
4260     storage_type = self.op.storage_type
4261
4262     try:
4263       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4264     except KeyError:
4265       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4266                                  " modified" % storage_type,
4267                                  errors.ECODE_INVAL)
4268
4269     diff = set(self.op.changes.keys()) - modifiable
4270     if diff:
4271       raise errors.OpPrereqError("The following fields can not be modified for"
4272                                  " storage units of type '%s': %r" %
4273                                  (storage_type, list(diff)),
4274                                  errors.ECODE_INVAL)
4275
4276   def ExpandNames(self):
4277     self.needed_locks = {
4278       locking.LEVEL_NODE: self.op.node_name,
4279       }
4280
4281   def Exec(self, feedback_fn):
4282     """Computes the list of nodes and their attributes.
4283
4284     """
4285     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4286     result = self.rpc.call_storage_modify(self.op.node_name,
4287                                           self.op.storage_type, st_args,
4288                                           self.op.name, self.op.changes)
4289     result.Raise("Failed to modify storage unit '%s' on %s" %
4290                  (self.op.name, self.op.node_name))
4291
4292
4293 class LUNodeAdd(LogicalUnit):
4294   """Logical unit for adding node to the cluster.
4295
4296   """
4297   HPATH = "node-add"
4298   HTYPE = constants.HTYPE_NODE
4299   _NFLAGS = ["master_capable", "vm_capable"]
4300
4301   def CheckArguments(self):
4302     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4303     # validate/normalize the node name
4304     self.hostname = netutils.GetHostname(name=self.op.node_name,
4305                                          family=self.primary_ip_family)
4306     self.op.node_name = self.hostname.name
4307
4308     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4309       raise errors.OpPrereqError("Cannot readd the master node",
4310                                  errors.ECODE_STATE)
4311
4312     if self.op.readd and self.op.group:
4313       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4314                                  " being readded", errors.ECODE_INVAL)
4315
4316   def BuildHooksEnv(self):
4317     """Build hooks env.
4318
4319     This will run on all nodes before, and on all nodes + the new node after.
4320
4321     """
4322     return {
4323       "OP_TARGET": self.op.node_name,
4324       "NODE_NAME": self.op.node_name,
4325       "NODE_PIP": self.op.primary_ip,
4326       "NODE_SIP": self.op.secondary_ip,
4327       "MASTER_CAPABLE": str(self.op.master_capable),
4328       "VM_CAPABLE": str(self.op.vm_capable),
4329       }
4330
4331   def BuildHooksNodes(self):
4332     """Build hooks nodes.
4333
4334     """
4335     # Exclude added node
4336     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4337     post_nodes = pre_nodes + [self.op.node_name, ]
4338
4339     return (pre_nodes, post_nodes)
4340
4341   def CheckPrereq(self):
4342     """Check prerequisites.
4343
4344     This checks:
4345      - the new node is not already in the config
4346      - it is resolvable
4347      - its parameters (single/dual homed) matches the cluster
4348
4349     Any errors are signaled by raising errors.OpPrereqError.
4350
4351     """
4352     cfg = self.cfg
4353     hostname = self.hostname
4354     node = hostname.name
4355     primary_ip = self.op.primary_ip = hostname.ip
4356     if self.op.secondary_ip is None:
4357       if self.primary_ip_family == netutils.IP6Address.family:
4358         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4359                                    " IPv4 address must be given as secondary",
4360                                    errors.ECODE_INVAL)
4361       self.op.secondary_ip = primary_ip
4362
4363     secondary_ip = self.op.secondary_ip
4364     if not netutils.IP4Address.IsValid(secondary_ip):
4365       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4366                                  " address" % secondary_ip, errors.ECODE_INVAL)
4367
4368     node_list = cfg.GetNodeList()
4369     if not self.op.readd and node in node_list:
4370       raise errors.OpPrereqError("Node %s is already in the configuration" %
4371                                  node, errors.ECODE_EXISTS)
4372     elif self.op.readd and node not in node_list:
4373       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4374                                  errors.ECODE_NOENT)
4375
4376     self.changed_primary_ip = False
4377
4378     for existing_node_name in node_list:
4379       existing_node = cfg.GetNodeInfo(existing_node_name)
4380
4381       if self.op.readd and node == existing_node_name:
4382         if existing_node.secondary_ip != secondary_ip:
4383           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4384                                      " address configuration as before",
4385                                      errors.ECODE_INVAL)
4386         if existing_node.primary_ip != primary_ip:
4387           self.changed_primary_ip = True
4388
4389         continue
4390
4391       if (existing_node.primary_ip == primary_ip or
4392           existing_node.secondary_ip == primary_ip or
4393           existing_node.primary_ip == secondary_ip or
4394           existing_node.secondary_ip == secondary_ip):
4395         raise errors.OpPrereqError("New node ip address(es) conflict with"
4396                                    " existing node %s" % existing_node.name,
4397                                    errors.ECODE_NOTUNIQUE)
4398
4399     # After this 'if' block, None is no longer a valid value for the
4400     # _capable op attributes
4401     if self.op.readd:
4402       old_node = self.cfg.GetNodeInfo(node)
4403       assert old_node is not None, "Can't retrieve locked node %s" % node
4404       for attr in self._NFLAGS:
4405         if getattr(self.op, attr) is None:
4406           setattr(self.op, attr, getattr(old_node, attr))
4407     else:
4408       for attr in self._NFLAGS:
4409         if getattr(self.op, attr) is None:
4410           setattr(self.op, attr, True)
4411
4412     if self.op.readd and not self.op.vm_capable:
4413       pri, sec = cfg.GetNodeInstances(node)
4414       if pri or sec:
4415         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4416                                    " flag set to false, but it already holds"
4417                                    " instances" % node,
4418                                    errors.ECODE_STATE)
4419
4420     # check that the type of the node (single versus dual homed) is the
4421     # same as for the master
4422     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4423     master_singlehomed = myself.secondary_ip == myself.primary_ip
4424     newbie_singlehomed = secondary_ip == primary_ip
4425     if master_singlehomed != newbie_singlehomed:
4426       if master_singlehomed:
4427         raise errors.OpPrereqError("The master has no secondary ip but the"
4428                                    " new node has one",
4429                                    errors.ECODE_INVAL)
4430       else:
4431         raise errors.OpPrereqError("The master has a secondary ip but the"
4432                                    " new node doesn't have one",
4433                                    errors.ECODE_INVAL)
4434
4435     # checks reachability
4436     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4437       raise errors.OpPrereqError("Node not reachable by ping",
4438                                  errors.ECODE_ENVIRON)
4439
4440     if not newbie_singlehomed:
4441       # check reachability from my secondary ip to newbie's secondary ip
4442       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4443                            source=myself.secondary_ip):
4444         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4445                                    " based ping to node daemon port",
4446                                    errors.ECODE_ENVIRON)
4447
4448     if self.op.readd:
4449       exceptions = [node]
4450     else:
4451       exceptions = []
4452
4453     if self.op.master_capable:
4454       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4455     else:
4456       self.master_candidate = False
4457
4458     if self.op.readd:
4459       self.new_node = old_node
4460     else:
4461       node_group = cfg.LookupNodeGroup(self.op.group)
4462       self.new_node = objects.Node(name=node,
4463                                    primary_ip=primary_ip,
4464                                    secondary_ip=secondary_ip,
4465                                    master_candidate=self.master_candidate,
4466                                    offline=False, drained=False,
4467                                    group=node_group)
4468
4469     if self.op.ndparams:
4470       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4471
4472   def Exec(self, feedback_fn):
4473     """Adds the new node to the cluster.
4474
4475     """
4476     new_node = self.new_node
4477     node = new_node.name
4478
4479     # We adding a new node so we assume it's powered
4480     new_node.powered = True
4481
4482     # for re-adds, reset the offline/drained/master-candidate flags;
4483     # we need to reset here, otherwise offline would prevent RPC calls
4484     # later in the procedure; this also means that if the re-add
4485     # fails, we are left with a non-offlined, broken node
4486     if self.op.readd:
4487       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4488       self.LogInfo("Readding a node, the offline/drained flags were reset")
4489       # if we demote the node, we do cleanup later in the procedure
4490       new_node.master_candidate = self.master_candidate
4491       if self.changed_primary_ip:
4492         new_node.primary_ip = self.op.primary_ip
4493
4494     # copy the master/vm_capable flags
4495     for attr in self._NFLAGS:
4496       setattr(new_node, attr, getattr(self.op, attr))
4497
4498     # notify the user about any possible mc promotion
4499     if new_node.master_candidate:
4500       self.LogInfo("Node will be a master candidate")
4501
4502     if self.op.ndparams:
4503       new_node.ndparams = self.op.ndparams
4504     else:
4505       new_node.ndparams = {}
4506
4507     # check connectivity
4508     result = self.rpc.call_version([node])[node]
4509     result.Raise("Can't get version information from node %s" % node)
4510     if constants.PROTOCOL_VERSION == result.payload:
4511       logging.info("Communication to node %s fine, sw version %s match",
4512                    node, result.payload)
4513     else:
4514       raise errors.OpExecError("Version mismatch master version %s,"
4515                                " node version %s" %
4516                                (constants.PROTOCOL_VERSION, result.payload))
4517
4518     # Add node to our /etc/hosts, and add key to known_hosts
4519     if self.cfg.GetClusterInfo().modify_etc_hosts:
4520       master_node = self.cfg.GetMasterNode()
4521       result = self.rpc.call_etc_hosts_modify(master_node,
4522                                               constants.ETC_HOSTS_ADD,
4523                                               self.hostname.name,
4524                                               self.hostname.ip)
4525       result.Raise("Can't update hosts file with new host data")
4526
4527     if new_node.secondary_ip != new_node.primary_ip:
4528       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4529                                False)
4530
4531     node_verify_list = [self.cfg.GetMasterNode()]
4532     node_verify_param = {
4533       constants.NV_NODELIST: [node],
4534       # TODO: do a node-net-test as well?
4535     }
4536
4537     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4538                                        self.cfg.GetClusterName())
4539     for verifier in node_verify_list:
4540       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4541       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4542       if nl_payload:
4543         for failed in nl_payload:
4544           feedback_fn("ssh/hostname verification failed"
4545                       " (checking from %s): %s" %
4546                       (verifier, nl_payload[failed]))
4547         raise errors.OpExecError("ssh/hostname verification failed")
4548
4549     if self.op.readd:
4550       _RedistributeAncillaryFiles(self)
4551       self.context.ReaddNode(new_node)
4552       # make sure we redistribute the config
4553       self.cfg.Update(new_node, feedback_fn)
4554       # and make sure the new node will not have old files around
4555       if not new_node.master_candidate:
4556         result = self.rpc.call_node_demote_from_mc(new_node.name)
4557         msg = result.fail_msg
4558         if msg:
4559           self.LogWarning("Node failed to demote itself from master"
4560                           " candidate status: %s" % msg)
4561     else:
4562       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4563                                   additional_vm=self.op.vm_capable)
4564       self.context.AddNode(new_node, self.proc.GetECId())
4565
4566
4567 class LUNodeSetParams(LogicalUnit):
4568   """Modifies the parameters of a node.
4569
4570   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4571       to the node role (as _ROLE_*)
4572   @cvar _R2F: a dictionary from node role to tuples of flags
4573   @cvar _FLAGS: a list of attribute names corresponding to the flags
4574
4575   """
4576   HPATH = "node-modify"
4577   HTYPE = constants.HTYPE_NODE
4578   REQ_BGL = False
4579   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4580   _F2R = {
4581     (True, False, False): _ROLE_CANDIDATE,
4582     (False, True, False): _ROLE_DRAINED,
4583     (False, False, True): _ROLE_OFFLINE,
4584     (False, False, False): _ROLE_REGULAR,
4585     }
4586   _R2F = dict((v, k) for k, v in _F2R.items())
4587   _FLAGS = ["master_candidate", "drained", "offline"]
4588
4589   def CheckArguments(self):
4590     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4591     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4592                 self.op.master_capable, self.op.vm_capable,
4593                 self.op.secondary_ip, self.op.ndparams]
4594     if all_mods.count(None) == len(all_mods):
4595       raise errors.OpPrereqError("Please pass at least one modification",
4596                                  errors.ECODE_INVAL)
4597     if all_mods.count(True) > 1:
4598       raise errors.OpPrereqError("Can't set the node into more than one"
4599                                  " state at the same time",
4600                                  errors.ECODE_INVAL)
4601
4602     # Boolean value that tells us whether we might be demoting from MC
4603     self.might_demote = (self.op.master_candidate == False or
4604                          self.op.offline == True or
4605                          self.op.drained == True or
4606                          self.op.master_capable == False)
4607
4608     if self.op.secondary_ip:
4609       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4610         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4611                                    " address" % self.op.secondary_ip,
4612                                    errors.ECODE_INVAL)
4613
4614     self.lock_all = self.op.auto_promote and self.might_demote
4615     self.lock_instances = self.op.secondary_ip is not None
4616
4617   def ExpandNames(self):
4618     if self.lock_all:
4619       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4620     else:
4621       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4622
4623     if self.lock_instances:
4624       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4625
4626   def DeclareLocks(self, level):
4627     # If we have locked all instances, before waiting to lock nodes, release
4628     # all the ones living on nodes unrelated to the current operation.
4629     if level == locking.LEVEL_NODE and self.lock_instances:
4630       self.affected_instances = []
4631       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4632         instances_keep = []
4633
4634         # Build list of instances to release
4635         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4636           instance = self.context.cfg.GetInstanceInfo(instance_name)
4637           if (instance.disk_template in constants.DTS_INT_MIRROR and
4638               self.op.node_name in instance.all_nodes):
4639             instances_keep.append(instance_name)
4640             self.affected_instances.append(instance)
4641
4642         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4643
4644         assert (set(self.acquired_locks.get(locking.LEVEL_INSTANCE, [])) ==
4645                 set(instances_keep))
4646
4647   def BuildHooksEnv(self):
4648     """Build hooks env.
4649
4650     This runs on the master node.
4651
4652     """
4653     return {
4654       "OP_TARGET": self.op.node_name,
4655       "MASTER_CANDIDATE": str(self.op.master_candidate),
4656       "OFFLINE": str(self.op.offline),
4657       "DRAINED": str(self.op.drained),
4658       "MASTER_CAPABLE": str(self.op.master_capable),
4659       "VM_CAPABLE": str(self.op.vm_capable),
4660       }
4661
4662   def BuildHooksNodes(self):
4663     """Build hooks nodes.
4664
4665     """
4666     nl = [self.cfg.GetMasterNode(), self.op.node_name]
4667     return (nl, nl)
4668
4669   def CheckPrereq(self):
4670     """Check prerequisites.
4671
4672     This only checks the instance list against the existing names.
4673
4674     """
4675     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4676
4677     if (self.op.master_candidate is not None or
4678         self.op.drained is not None or
4679         self.op.offline is not None):
4680       # we can't change the master's node flags
4681       if self.op.node_name == self.cfg.GetMasterNode():
4682         raise errors.OpPrereqError("The master role can be changed"
4683                                    " only via master-failover",
4684                                    errors.ECODE_INVAL)
4685
4686     if self.op.master_candidate and not node.master_capable:
4687       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4688                                  " it a master candidate" % node.name,
4689                                  errors.ECODE_STATE)
4690
4691     if self.op.vm_capable == False:
4692       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4693       if ipri or isec:
4694         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4695                                    " the vm_capable flag" % node.name,
4696                                    errors.ECODE_STATE)
4697
4698     if node.master_candidate and self.might_demote and not self.lock_all:
4699       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4700       # check if after removing the current node, we're missing master
4701       # candidates
4702       (mc_remaining, mc_should, _) = \
4703           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4704       if mc_remaining < mc_should:
4705         raise errors.OpPrereqError("Not enough master candidates, please"
4706                                    " pass auto promote option to allow"
4707                                    " promotion", errors.ECODE_STATE)
4708
4709     self.old_flags = old_flags = (node.master_candidate,
4710                                   node.drained, node.offline)
4711     assert old_flags in self._F2R, "Un-handled old flags  %s" % str(old_flags)
4712     self.old_role = old_role = self._F2R[old_flags]
4713
4714     # Check for ineffective changes
4715     for attr in self._FLAGS:
4716       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4717         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4718         setattr(self.op, attr, None)
4719
4720     # Past this point, any flag change to False means a transition
4721     # away from the respective state, as only real changes are kept
4722
4723     # TODO: We might query the real power state if it supports OOB
4724     if _SupportsOob(self.cfg, node):
4725       if self.op.offline is False and not (node.powered or
4726                                            self.op.powered == True):
4727         raise errors.OpPrereqError(("Please power on node %s first before you"
4728                                     " can reset offline state") %
4729                                    self.op.node_name)
4730     elif self.op.powered is not None:
4731       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4732                                   " which does not support out-of-band"
4733                                   " handling") % self.op.node_name)
4734
4735     # If we're being deofflined/drained, we'll MC ourself if needed
4736     if (self.op.drained == False or self.op.offline == False or
4737         (self.op.master_capable and not node.master_capable)):
4738       if _DecideSelfPromotion(self):
4739         self.op.master_candidate = True
4740         self.LogInfo("Auto-promoting node to master candidate")
4741
4742     # If we're no longer master capable, we'll demote ourselves from MC
4743     if self.op.master_capable == False and node.master_candidate:
4744       self.LogInfo("Demoting from master candidate")
4745       self.op.master_candidate = False
4746
4747     # Compute new role
4748     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4749     if self.op.master_candidate:
4750       new_role = self._ROLE_CANDIDATE
4751     elif self.op.drained:
4752       new_role = self._ROLE_DRAINED
4753     elif self.op.offline:
4754       new_role = self._ROLE_OFFLINE
4755     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4756       # False is still in new flags, which means we're un-setting (the
4757       # only) True flag
4758       new_role = self._ROLE_REGULAR
4759     else: # no new flags, nothing, keep old role
4760       new_role = old_role
4761
4762     self.new_role = new_role
4763
4764     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4765       # Trying to transition out of offline status
4766       result = self.rpc.call_version([node.name])[node.name]
4767       if result.fail_msg:
4768         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4769                                    " to report its version: %s" %
4770                                    (node.name, result.fail_msg),
4771                                    errors.ECODE_STATE)
4772       else:
4773         self.LogWarning("Transitioning node from offline to online state"
4774                         " without using re-add. Please make sure the node"
4775                         " is healthy!")
4776
4777     if self.op.secondary_ip:
4778       # Ok even without locking, because this can't be changed by any LU
4779       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4780       master_singlehomed = master.secondary_ip == master.primary_ip
4781       if master_singlehomed and self.op.secondary_ip:
4782         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4783                                    " homed cluster", errors.ECODE_INVAL)
4784
4785       if node.offline:
4786         if self.affected_instances:
4787           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4788                                      " node has instances (%s) configured"
4789                                      " to use it" % self.affected_instances)
4790       else:
4791         # On online nodes, check that no instances are running, and that
4792         # the node has the new ip and we can reach it.
4793         for instance in self.affected_instances:
4794           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4795
4796         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4797         if master.name != node.name:
4798           # check reachability from master secondary ip to new secondary ip
4799           if not netutils.TcpPing(self.op.secondary_ip,
4800                                   constants.DEFAULT_NODED_PORT,
4801                                   source=master.secondary_ip):
4802             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4803                                        " based ping to node daemon port",
4804                                        errors.ECODE_ENVIRON)
4805
4806     if self.op.ndparams:
4807       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4808       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4809       self.new_ndparams = new_ndparams
4810
4811   def Exec(self, feedback_fn):
4812     """Modifies a node.
4813
4814     """
4815     node = self.node
4816     old_role = self.old_role
4817     new_role = self.new_role
4818
4819     result = []
4820
4821     if self.op.ndparams:
4822       node.ndparams = self.new_ndparams
4823
4824     if self.op.powered is not None:
4825       node.powered = self.op.powered
4826
4827     for attr in ["master_capable", "vm_capable"]:
4828       val = getattr(self.op, attr)
4829       if val is not None:
4830         setattr(node, attr, val)
4831         result.append((attr, str(val)))
4832
4833     if new_role != old_role:
4834       # Tell the node to demote itself, if no longer MC and not offline
4835       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4836         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4837         if msg:
4838           self.LogWarning("Node failed to demote itself: %s", msg)
4839
4840       new_flags = self._R2F[new_role]
4841       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4842         if of != nf:
4843           result.append((desc, str(nf)))
4844       (node.master_candidate, node.drained, node.offline) = new_flags
4845
4846       # we locked all nodes, we adjust the CP before updating this node
4847       if self.lock_all:
4848         _AdjustCandidatePool(self, [node.name])
4849
4850     if self.op.secondary_ip:
4851       node.secondary_ip = self.op.secondary_ip
4852       result.append(("secondary_ip", self.op.secondary_ip))
4853
4854     # this will trigger configuration file update, if needed
4855     self.cfg.Update(node, feedback_fn)
4856
4857     # this will trigger job queue propagation or cleanup if the mc
4858     # flag changed
4859     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4860       self.context.ReaddNode(node)
4861
4862     return result
4863
4864
4865 class LUNodePowercycle(NoHooksLU):
4866   """Powercycles a node.
4867
4868   """
4869   REQ_BGL = False
4870
4871   def CheckArguments(self):
4872     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4873     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4874       raise errors.OpPrereqError("The node is the master and the force"
4875                                  " parameter was not set",
4876                                  errors.ECODE_INVAL)
4877
4878   def ExpandNames(self):
4879     """Locking for PowercycleNode.
4880
4881     This is a last-resort option and shouldn't block on other
4882     jobs. Therefore, we grab no locks.
4883
4884     """
4885     self.needed_locks = {}
4886
4887   def Exec(self, feedback_fn):
4888     """Reboots a node.
4889
4890     """
4891     result = self.rpc.call_node_powercycle(self.op.node_name,
4892                                            self.cfg.GetHypervisorType())
4893     result.Raise("Failed to schedule the reboot")
4894     return result.payload
4895
4896
4897 class LUClusterQuery(NoHooksLU):
4898   """Query cluster configuration.
4899
4900   """
4901   REQ_BGL = False
4902
4903   def ExpandNames(self):
4904     self.needed_locks = {}
4905
4906   def Exec(self, feedback_fn):
4907     """Return cluster config.
4908
4909     """
4910     cluster = self.cfg.GetClusterInfo()
4911     os_hvp = {}
4912
4913     # Filter just for enabled hypervisors
4914     for os_name, hv_dict in cluster.os_hvp.items():
4915       os_hvp[os_name] = {}
4916       for hv_name, hv_params in hv_dict.items():
4917         if hv_name in cluster.enabled_hypervisors:
4918           os_hvp[os_name][hv_name] = hv_params
4919
4920     # Convert ip_family to ip_version
4921     primary_ip_version = constants.IP4_VERSION
4922     if cluster.primary_ip_family == netutils.IP6Address.family:
4923       primary_ip_version = constants.IP6_VERSION
4924
4925     result = {
4926       "software_version": constants.RELEASE_VERSION,
4927       "protocol_version": constants.PROTOCOL_VERSION,
4928       "config_version": constants.CONFIG_VERSION,
4929       "os_api_version": max(constants.OS_API_VERSIONS),
4930       "export_version": constants.EXPORT_VERSION,
4931       "architecture": (platform.architecture()[0], platform.machine()),
4932       "name": cluster.cluster_name,
4933       "master": cluster.master_node,
4934       "default_hypervisor": cluster.enabled_hypervisors[0],
4935       "enabled_hypervisors": cluster.enabled_hypervisors,
4936       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4937                         for hypervisor_name in cluster.enabled_hypervisors]),
4938       "os_hvp": os_hvp,
4939       "beparams": cluster.beparams,
4940       "osparams": cluster.osparams,
4941       "nicparams": cluster.nicparams,
4942       "ndparams": cluster.ndparams,
4943       "candidate_pool_size": cluster.candidate_pool_size,
4944       "master_netdev": cluster.master_netdev,
4945       "volume_group_name": cluster.volume_group_name,
4946       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4947       "file_storage_dir": cluster.file_storage_dir,
4948       "shared_file_storage_dir": cluster.shared_file_storage_dir,
4949       "maintain_node_health": cluster.maintain_node_health,
4950       "ctime": cluster.ctime,
4951       "mtime": cluster.mtime,
4952       "uuid": cluster.uuid,
4953       "tags": list(cluster.GetTags()),
4954       "uid_pool": cluster.uid_pool,
4955       "default_iallocator": cluster.default_iallocator,
4956       "reserved_lvs": cluster.reserved_lvs,
4957       "primary_ip_version": primary_ip_version,
4958       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4959       "hidden_os": cluster.hidden_os,
4960       "blacklisted_os": cluster.blacklisted_os,
4961       }
4962
4963     return result
4964
4965
4966 class LUClusterConfigQuery(NoHooksLU):
4967   """Return configuration values.
4968
4969   """
4970   REQ_BGL = False
4971   _FIELDS_DYNAMIC = utils.FieldSet()
4972   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4973                                   "watcher_pause", "volume_group_name")
4974
4975   def CheckArguments(self):
4976     _CheckOutputFields(static=self._FIELDS_STATIC,
4977                        dynamic=self._FIELDS_DYNAMIC,
4978                        selected=self.op.output_fields)
4979
4980   def ExpandNames(self):
4981     self.needed_locks = {}
4982
4983   def Exec(self, feedback_fn):
4984     """Dump a representation of the cluster config to the standard output.
4985
4986     """
4987     values = []
4988     for field in self.op.output_fields:
4989       if field == "cluster_name":
4990         entry = self.cfg.GetClusterName()
4991       elif field == "master_node":
4992         entry = self.cfg.GetMasterNode()
4993       elif field == "drain_flag":
4994         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4995       elif field == "watcher_pause":
4996         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4997       elif field == "volume_group_name":
4998         entry = self.cfg.GetVGName()
4999       else:
5000         raise errors.ParameterError(field)
5001       values.append(entry)
5002     return values
5003
5004
5005 class LUInstanceActivateDisks(NoHooksLU):
5006   """Bring up an instance's disks.
5007
5008   """
5009   REQ_BGL = False
5010
5011   def ExpandNames(self):
5012     self._ExpandAndLockInstance()
5013     self.needed_locks[locking.LEVEL_NODE] = []
5014     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5015
5016   def DeclareLocks(self, level):
5017     if level == locking.LEVEL_NODE:
5018       self._LockInstancesNodes()
5019
5020   def CheckPrereq(self):
5021     """Check prerequisites.
5022
5023     This checks that the instance is in the cluster.
5024
5025     """
5026     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5027     assert self.instance is not None, \
5028       "Cannot retrieve locked instance %s" % self.op.instance_name
5029     _CheckNodeOnline(self, self.instance.primary_node)
5030
5031   def Exec(self, feedback_fn):
5032     """Activate the disks.
5033
5034     """
5035     disks_ok, disks_info = \
5036               _AssembleInstanceDisks(self, self.instance,
5037                                      ignore_size=self.op.ignore_size)
5038     if not disks_ok:
5039       raise errors.OpExecError("Cannot activate block devices")
5040
5041     return disks_info
5042
5043
5044 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5045                            ignore_size=False):
5046   """Prepare the block devices for an instance.
5047
5048   This sets up the block devices on all nodes.
5049
5050   @type lu: L{LogicalUnit}
5051   @param lu: the logical unit on whose behalf we execute
5052   @type instance: L{objects.Instance}
5053   @param instance: the instance for whose disks we assemble
5054   @type disks: list of L{objects.Disk} or None
5055   @param disks: which disks to assemble (or all, if None)
5056   @type ignore_secondaries: boolean
5057   @param ignore_secondaries: if true, errors on secondary nodes
5058       won't result in an error return from the function
5059   @type ignore_size: boolean
5060   @param ignore_size: if true, the current known size of the disk
5061       will not be used during the disk activation, useful for cases
5062       when the size is wrong
5063   @return: False if the operation failed, otherwise a list of
5064       (host, instance_visible_name, node_visible_name)
5065       with the mapping from node devices to instance devices
5066
5067   """
5068   device_info = []
5069   disks_ok = True
5070   iname = instance.name
5071   disks = _ExpandCheckDisks(instance, disks)
5072
5073   # With the two passes mechanism we try to reduce the window of
5074   # opportunity for the race condition of switching DRBD to primary
5075   # before handshaking occured, but we do not eliminate it
5076
5077   # The proper fix would be to wait (with some limits) until the
5078   # connection has been made and drbd transitions from WFConnection
5079   # into any other network-connected state (Connected, SyncTarget,
5080   # SyncSource, etc.)
5081
5082   # 1st pass, assemble on all nodes in secondary mode
5083   for idx, inst_disk in enumerate(disks):
5084     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5085       if ignore_size:
5086         node_disk = node_disk.Copy()
5087         node_disk.UnsetSize()
5088       lu.cfg.SetDiskID(node_disk, node)
5089       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5090       msg = result.fail_msg
5091       if msg:
5092         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5093                            " (is_primary=False, pass=1): %s",
5094                            inst_disk.iv_name, node, msg)
5095         if not ignore_secondaries:
5096           disks_ok = False
5097
5098   # FIXME: race condition on drbd migration to primary
5099
5100   # 2nd pass, do only the primary node
5101   for idx, inst_disk in enumerate(disks):
5102     dev_path = None
5103
5104     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5105       if node != instance.primary_node:
5106         continue
5107       if ignore_size:
5108         node_disk = node_disk.Copy()
5109         node_disk.UnsetSize()
5110       lu.cfg.SetDiskID(node_disk, node)
5111       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5112       msg = result.fail_msg
5113       if msg:
5114         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5115                            " (is_primary=True, pass=2): %s",
5116                            inst_disk.iv_name, node, msg)
5117         disks_ok = False
5118       else:
5119         dev_path = result.payload
5120
5121     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5122
5123   # leave the disks configured for the primary node
5124   # this is a workaround that would be fixed better by
5125   # improving the logical/physical id handling
5126   for disk in disks:
5127     lu.cfg.SetDiskID(disk, instance.primary_node)
5128
5129   return disks_ok, device_info
5130
5131
5132 def _StartInstanceDisks(lu, instance, force):
5133   """Start the disks of an instance.
5134
5135   """
5136   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5137                                            ignore_secondaries=force)
5138   if not disks_ok:
5139     _ShutdownInstanceDisks(lu, instance)
5140     if force is not None and not force:
5141       lu.proc.LogWarning("", hint="If the message above refers to a"
5142                          " secondary node,"
5143                          " you can retry the operation using '--force'.")
5144     raise errors.OpExecError("Disk consistency error")
5145
5146
5147 class LUInstanceDeactivateDisks(NoHooksLU):
5148   """Shutdown an instance's disks.
5149
5150   """
5151   REQ_BGL = False
5152
5153   def ExpandNames(self):
5154     self._ExpandAndLockInstance()
5155     self.needed_locks[locking.LEVEL_NODE] = []
5156     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5157
5158   def DeclareLocks(self, level):
5159     if level == locking.LEVEL_NODE:
5160       self._LockInstancesNodes()
5161
5162   def CheckPrereq(self):
5163     """Check prerequisites.
5164
5165     This checks that the instance is in the cluster.
5166
5167     """
5168     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5169     assert self.instance is not None, \
5170       "Cannot retrieve locked instance %s" % self.op.instance_name
5171
5172   def Exec(self, feedback_fn):
5173     """Deactivate the disks
5174
5175     """
5176     instance = self.instance
5177     if self.op.force:
5178       _ShutdownInstanceDisks(self, instance)
5179     else:
5180       _SafeShutdownInstanceDisks(self, instance)
5181
5182
5183 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5184   """Shutdown block devices of an instance.
5185
5186   This function checks if an instance is running, before calling
5187   _ShutdownInstanceDisks.
5188
5189   """
5190   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5191   _ShutdownInstanceDisks(lu, instance, disks=disks)
5192
5193
5194 def _ExpandCheckDisks(instance, disks):
5195   """Return the instance disks selected by the disks list
5196
5197   @type disks: list of L{objects.Disk} or None
5198   @param disks: selected disks
5199   @rtype: list of L{objects.Disk}
5200   @return: selected instance disks to act on
5201
5202   """
5203   if disks is None:
5204     return instance.disks
5205   else:
5206     if not set(disks).issubset(instance.disks):
5207       raise errors.ProgrammerError("Can only act on disks belonging to the"
5208                                    " target instance")
5209     return disks
5210
5211
5212 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5213   """Shutdown block devices of an instance.
5214
5215   This does the shutdown on all nodes of the instance.
5216
5217   If the ignore_primary is false, errors on the primary node are
5218   ignored.
5219
5220   """
5221   all_result = True
5222   disks = _ExpandCheckDisks(instance, disks)
5223
5224   for disk in disks:
5225     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5226       lu.cfg.SetDiskID(top_disk, node)
5227       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5228       msg = result.fail_msg
5229       if msg:
5230         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5231                       disk.iv_name, node, msg)
5232         if ((node == instance.primary_node and not ignore_primary) or
5233             (node != instance.primary_node and not result.offline)):
5234           all_result = False
5235   return all_result
5236
5237
5238 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5239   """Checks if a node has enough free memory.
5240
5241   This function check if a given node has the needed amount of free
5242   memory. In case the node has less memory or we cannot get the
5243   information from the node, this function raise an OpPrereqError
5244   exception.
5245
5246   @type lu: C{LogicalUnit}
5247   @param lu: a logical unit from which we get configuration data
5248   @type node: C{str}
5249   @param node: the node to check
5250   @type reason: C{str}
5251   @param reason: string to use in the error message
5252   @type requested: C{int}
5253   @param requested: the amount of memory in MiB to check for
5254   @type hypervisor_name: C{str}
5255   @param hypervisor_name: the hypervisor to ask for memory stats
5256   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5257       we cannot check the node
5258
5259   """
5260   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5261   nodeinfo[node].Raise("Can't get data from node %s" % node,
5262                        prereq=True, ecode=errors.ECODE_ENVIRON)
5263   free_mem = nodeinfo[node].payload.get('memory_free', None)
5264   if not isinstance(free_mem, int):
5265     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5266                                " was '%s'" % (node, free_mem),
5267                                errors.ECODE_ENVIRON)
5268   if requested > free_mem:
5269     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5270                                " needed %s MiB, available %s MiB" %
5271                                (node, reason, requested, free_mem),
5272                                errors.ECODE_NORES)
5273
5274
5275 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5276   """Checks if nodes have enough free disk space in the all VGs.
5277
5278   This function check if all given nodes have the needed amount of
5279   free disk. In case any node has less disk or we cannot get the
5280   information from the node, this function raise an OpPrereqError
5281   exception.
5282
5283   @type lu: C{LogicalUnit}
5284   @param lu: a logical unit from which we get configuration data
5285   @type nodenames: C{list}
5286   @param nodenames: the list of node names to check
5287   @type req_sizes: C{dict}
5288   @param req_sizes: the hash of vg and corresponding amount of disk in
5289       MiB to check for
5290   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5291       or we cannot check the node
5292
5293   """
5294   for vg, req_size in req_sizes.items():
5295     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5296
5297
5298 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5299   """Checks if nodes have enough free disk space in the specified VG.
5300
5301   This function check if all given nodes have the needed amount of
5302   free disk. In case any node has less disk or we cannot get the
5303   information from the node, this function raise an OpPrereqError
5304   exception.
5305
5306   @type lu: C{LogicalUnit}
5307   @param lu: a logical unit from which we get configuration data
5308   @type nodenames: C{list}
5309   @param nodenames: the list of node names to check
5310   @type vg: C{str}
5311   @param vg: the volume group to check
5312   @type requested: C{int}
5313   @param requested: the amount of disk in MiB to check for
5314   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5315       or we cannot check the node
5316
5317   """
5318   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5319   for node in nodenames:
5320     info = nodeinfo[node]
5321     info.Raise("Cannot get current information from node %s" % node,
5322                prereq=True, ecode=errors.ECODE_ENVIRON)
5323     vg_free = info.payload.get("vg_free", None)
5324     if not isinstance(vg_free, int):
5325       raise errors.OpPrereqError("Can't compute free disk space on node"
5326                                  " %s for vg %s, result was '%s'" %
5327                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5328     if requested > vg_free:
5329       raise errors.OpPrereqError("Not enough disk space on target node %s"
5330                                  " vg %s: required %d MiB, available %d MiB" %
5331                                  (node, vg, requested, vg_free),
5332                                  errors.ECODE_NORES)
5333
5334
5335 class LUInstanceStartup(LogicalUnit):
5336   """Starts an instance.
5337
5338   """
5339   HPATH = "instance-start"
5340   HTYPE = constants.HTYPE_INSTANCE
5341   REQ_BGL = False
5342
5343   def CheckArguments(self):
5344     # extra beparams
5345     if self.op.beparams:
5346       # fill the beparams dict
5347       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5348
5349   def ExpandNames(self):
5350     self._ExpandAndLockInstance()
5351
5352   def BuildHooksEnv(self):
5353     """Build hooks env.
5354
5355     This runs on master, primary and secondary nodes of the instance.
5356
5357     """
5358     env = {
5359       "FORCE": self.op.force,
5360       }
5361
5362     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5363
5364     return env
5365
5366   def BuildHooksNodes(self):
5367     """Build hooks nodes.
5368
5369     """
5370     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5371     return (nl, nl)
5372
5373   def CheckPrereq(self):
5374     """Check prerequisites.
5375
5376     This checks that the instance is in the cluster.
5377
5378     """
5379     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5380     assert self.instance is not None, \
5381       "Cannot retrieve locked instance %s" % self.op.instance_name
5382
5383     # extra hvparams
5384     if self.op.hvparams:
5385       # check hypervisor parameter syntax (locally)
5386       cluster = self.cfg.GetClusterInfo()
5387       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5388       filled_hvp = cluster.FillHV(instance)
5389       filled_hvp.update(self.op.hvparams)
5390       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5391       hv_type.CheckParameterSyntax(filled_hvp)
5392       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5393
5394     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5395
5396     if self.primary_offline and self.op.ignore_offline_nodes:
5397       self.proc.LogWarning("Ignoring offline primary node")
5398
5399       if self.op.hvparams or self.op.beparams:
5400         self.proc.LogWarning("Overridden parameters are ignored")
5401     else:
5402       _CheckNodeOnline(self, instance.primary_node)
5403
5404       bep = self.cfg.GetClusterInfo().FillBE(instance)
5405
5406       # check bridges existence
5407       _CheckInstanceBridgesExist(self, instance)
5408
5409       remote_info = self.rpc.call_instance_info(instance.primary_node,
5410                                                 instance.name,
5411                                                 instance.hypervisor)
5412       remote_info.Raise("Error checking node %s" % instance.primary_node,
5413                         prereq=True, ecode=errors.ECODE_ENVIRON)
5414       if not remote_info.payload: # not running already
5415         _CheckNodeFreeMemory(self, instance.primary_node,
5416                              "starting instance %s" % instance.name,
5417                              bep[constants.BE_MEMORY], instance.hypervisor)
5418
5419   def Exec(self, feedback_fn):
5420     """Start the instance.
5421
5422     """
5423     instance = self.instance
5424     force = self.op.force
5425
5426     self.cfg.MarkInstanceUp(instance.name)
5427
5428     if self.primary_offline:
5429       assert self.op.ignore_offline_nodes
5430       self.proc.LogInfo("Primary node offline, marked instance as started")
5431     else:
5432       node_current = instance.primary_node
5433
5434       _StartInstanceDisks(self, instance, force)
5435
5436       result = self.rpc.call_instance_start(node_current, instance,
5437                                             self.op.hvparams, self.op.beparams)
5438       msg = result.fail_msg
5439       if msg:
5440         _ShutdownInstanceDisks(self, instance)
5441         raise errors.OpExecError("Could not start instance: %s" % msg)
5442
5443
5444 class LUInstanceReboot(LogicalUnit):
5445   """Reboot an instance.
5446
5447   """
5448   HPATH = "instance-reboot"
5449   HTYPE = constants.HTYPE_INSTANCE
5450   REQ_BGL = False
5451
5452   def ExpandNames(self):
5453     self._ExpandAndLockInstance()
5454
5455   def BuildHooksEnv(self):
5456     """Build hooks env.
5457
5458     This runs on master, primary and secondary nodes of the instance.
5459
5460     """
5461     env = {
5462       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5463       "REBOOT_TYPE": self.op.reboot_type,
5464       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5465       }
5466
5467     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5468
5469     return env
5470
5471   def BuildHooksNodes(self):
5472     """Build hooks nodes.
5473
5474     """
5475     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5476     return (nl, nl)
5477
5478   def CheckPrereq(self):
5479     """Check prerequisites.
5480
5481     This checks that the instance is in the cluster.
5482
5483     """
5484     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5485     assert self.instance is not None, \
5486       "Cannot retrieve locked instance %s" % self.op.instance_name
5487
5488     _CheckNodeOnline(self, instance.primary_node)
5489
5490     # check bridges existence
5491     _CheckInstanceBridgesExist(self, instance)
5492
5493   def Exec(self, feedback_fn):
5494     """Reboot the instance.
5495
5496     """
5497     instance = self.instance
5498     ignore_secondaries = self.op.ignore_secondaries
5499     reboot_type = self.op.reboot_type
5500
5501     remote_info = self.rpc.call_instance_info(instance.primary_node,
5502                                               instance.name,
5503                                               instance.hypervisor)
5504     remote_info.Raise("Error checking node %s" % instance.primary_node)
5505     instance_running = bool(remote_info.payload)
5506
5507     node_current = instance.primary_node
5508
5509     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5510                                             constants.INSTANCE_REBOOT_HARD]:
5511       for disk in instance.disks:
5512         self.cfg.SetDiskID(disk, node_current)
5513       result = self.rpc.call_instance_reboot(node_current, instance,
5514                                              reboot_type,
5515                                              self.op.shutdown_timeout)
5516       result.Raise("Could not reboot instance")
5517     else:
5518       if instance_running:
5519         result = self.rpc.call_instance_shutdown(node_current, instance,
5520                                                  self.op.shutdown_timeout)
5521         result.Raise("Could not shutdown instance for full reboot")
5522         _ShutdownInstanceDisks(self, instance)
5523       else:
5524         self.LogInfo("Instance %s was already stopped, starting now",
5525                      instance.name)
5526       _StartInstanceDisks(self, instance, ignore_secondaries)
5527       result = self.rpc.call_instance_start(node_current, instance, None, None)
5528       msg = result.fail_msg
5529       if msg:
5530         _ShutdownInstanceDisks(self, instance)
5531         raise errors.OpExecError("Could not start instance for"
5532                                  " full reboot: %s" % msg)
5533
5534     self.cfg.MarkInstanceUp(instance.name)
5535
5536
5537 class LUInstanceShutdown(LogicalUnit):
5538   """Shutdown an instance.
5539
5540   """
5541   HPATH = "instance-stop"
5542   HTYPE = constants.HTYPE_INSTANCE
5543   REQ_BGL = False
5544
5545   def ExpandNames(self):
5546     self._ExpandAndLockInstance()
5547
5548   def BuildHooksEnv(self):
5549     """Build hooks env.
5550
5551     This runs on master, primary and secondary nodes of the instance.
5552
5553     """
5554     env = _BuildInstanceHookEnvByObject(self, self.instance)
5555     env["TIMEOUT"] = self.op.timeout
5556     return env
5557
5558   def BuildHooksNodes(self):
5559     """Build hooks nodes.
5560
5561     """
5562     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5563     return (nl, nl)
5564
5565   def CheckPrereq(self):
5566     """Check prerequisites.
5567
5568     This checks that the instance is in the cluster.
5569
5570     """
5571     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5572     assert self.instance is not None, \
5573       "Cannot retrieve locked instance %s" % self.op.instance_name
5574
5575     self.primary_offline = \
5576       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5577
5578     if self.primary_offline and self.op.ignore_offline_nodes:
5579       self.proc.LogWarning("Ignoring offline primary node")
5580     else:
5581       _CheckNodeOnline(self, self.instance.primary_node)
5582
5583   def Exec(self, feedback_fn):
5584     """Shutdown the instance.
5585
5586     """
5587     instance = self.instance
5588     node_current = instance.primary_node
5589     timeout = self.op.timeout
5590
5591     self.cfg.MarkInstanceDown(instance.name)
5592
5593     if self.primary_offline:
5594       assert self.op.ignore_offline_nodes
5595       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5596     else:
5597       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5598       msg = result.fail_msg
5599       if msg:
5600         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5601
5602       _ShutdownInstanceDisks(self, instance)
5603
5604
5605 class LUInstanceReinstall(LogicalUnit):
5606   """Reinstall an instance.
5607
5608   """
5609   HPATH = "instance-reinstall"
5610   HTYPE = constants.HTYPE_INSTANCE
5611   REQ_BGL = False
5612
5613   def ExpandNames(self):
5614     self._ExpandAndLockInstance()
5615
5616   def BuildHooksEnv(self):
5617     """Build hooks env.
5618
5619     This runs on master, primary and secondary nodes of the instance.
5620
5621     """
5622     return _BuildInstanceHookEnvByObject(self, self.instance)
5623
5624   def BuildHooksNodes(self):
5625     """Build hooks nodes.
5626
5627     """
5628     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5629     return (nl, nl)
5630
5631   def CheckPrereq(self):
5632     """Check prerequisites.
5633
5634     This checks that the instance is in the cluster and is not running.
5635
5636     """
5637     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5638     assert instance is not None, \
5639       "Cannot retrieve locked instance %s" % self.op.instance_name
5640     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5641                      " offline, cannot reinstall")
5642     for node in instance.secondary_nodes:
5643       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5644                        " cannot reinstall")
5645
5646     if instance.disk_template == constants.DT_DISKLESS:
5647       raise errors.OpPrereqError("Instance '%s' has no disks" %
5648                                  self.op.instance_name,
5649                                  errors.ECODE_INVAL)
5650     _CheckInstanceDown(self, instance, "cannot reinstall")
5651
5652     if self.op.os_type is not None:
5653       # OS verification
5654       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5655       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5656       instance_os = self.op.os_type
5657     else:
5658       instance_os = instance.os
5659
5660     nodelist = list(instance.all_nodes)
5661
5662     if self.op.osparams:
5663       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5664       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5665       self.os_inst = i_osdict # the new dict (without defaults)
5666     else:
5667       self.os_inst = None
5668
5669     self.instance = instance
5670
5671   def Exec(self, feedback_fn):
5672     """Reinstall the instance.
5673
5674     """
5675     inst = self.instance
5676
5677     if self.op.os_type is not None:
5678       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5679       inst.os = self.op.os_type
5680       # Write to configuration
5681       self.cfg.Update(inst, feedback_fn)
5682
5683     _StartInstanceDisks(self, inst, None)
5684     try:
5685       feedback_fn("Running the instance OS create scripts...")
5686       # FIXME: pass debug option from opcode to backend
5687       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5688                                              self.op.debug_level,
5689                                              osparams=self.os_inst)
5690       result.Raise("Could not install OS for instance %s on node %s" %
5691                    (inst.name, inst.primary_node))
5692     finally:
5693       _ShutdownInstanceDisks(self, inst)
5694
5695
5696 class LUInstanceRecreateDisks(LogicalUnit):
5697   """Recreate an instance's missing disks.
5698
5699   """
5700   HPATH = "instance-recreate-disks"
5701   HTYPE = constants.HTYPE_INSTANCE
5702   REQ_BGL = False
5703
5704   def ExpandNames(self):
5705     self._ExpandAndLockInstance()
5706
5707   def BuildHooksEnv(self):
5708     """Build hooks env.
5709
5710     This runs on master, primary and secondary nodes of the instance.
5711
5712     """
5713     return _BuildInstanceHookEnvByObject(self, self.instance)
5714
5715   def BuildHooksNodes(self):
5716     """Build hooks nodes.
5717
5718     """
5719     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5720     return (nl, nl)
5721
5722   def CheckPrereq(self):
5723     """Check prerequisites.
5724
5725     This checks that the instance is in the cluster and is not running.
5726
5727     """
5728     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5729     assert instance is not None, \
5730       "Cannot retrieve locked instance %s" % self.op.instance_name
5731     _CheckNodeOnline(self, instance.primary_node)
5732
5733     if instance.disk_template == constants.DT_DISKLESS:
5734       raise errors.OpPrereqError("Instance '%s' has no disks" %
5735                                  self.op.instance_name, errors.ECODE_INVAL)
5736     _CheckInstanceDown(self, instance, "cannot recreate disks")
5737
5738     if not self.op.disks:
5739       self.op.disks = range(len(instance.disks))
5740     else:
5741       for idx in self.op.disks:
5742         if idx >= len(instance.disks):
5743           raise errors.OpPrereqError("Invalid disk index passed '%s'" % idx,
5744                                      errors.ECODE_INVAL)
5745
5746     self.instance = instance
5747
5748   def Exec(self, feedback_fn):
5749     """Recreate the disks.
5750
5751     """
5752     to_skip = []
5753     for idx, _ in enumerate(self.instance.disks):
5754       if idx not in self.op.disks: # disk idx has not been passed in
5755         to_skip.append(idx)
5756         continue
5757
5758     _CreateDisks(self, self.instance, to_skip=to_skip)
5759
5760
5761 class LUInstanceRename(LogicalUnit):
5762   """Rename an instance.
5763
5764   """
5765   HPATH = "instance-rename"
5766   HTYPE = constants.HTYPE_INSTANCE
5767
5768   def CheckArguments(self):
5769     """Check arguments.
5770
5771     """
5772     if self.op.ip_check and not self.op.name_check:
5773       # TODO: make the ip check more flexible and not depend on the name check
5774       raise errors.OpPrereqError("Cannot do ip check without a name check",
5775                                  errors.ECODE_INVAL)
5776
5777   def BuildHooksEnv(self):
5778     """Build hooks env.
5779
5780     This runs on master, primary and secondary nodes of the instance.
5781
5782     """
5783     env = _BuildInstanceHookEnvByObject(self, self.instance)
5784     env["INSTANCE_NEW_NAME"] = self.op.new_name
5785     return env
5786
5787   def BuildHooksNodes(self):
5788     """Build hooks nodes.
5789
5790     """
5791     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5792     return (nl, nl)
5793
5794   def CheckPrereq(self):
5795     """Check prerequisites.
5796
5797     This checks that the instance is in the cluster and is not running.
5798
5799     """
5800     self.op.instance_name = _ExpandInstanceName(self.cfg,
5801                                                 self.op.instance_name)
5802     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5803     assert instance is not None
5804     _CheckNodeOnline(self, instance.primary_node)
5805     _CheckInstanceDown(self, instance, "cannot rename")
5806     self.instance = instance
5807
5808     new_name = self.op.new_name
5809     if self.op.name_check:
5810       hostname = netutils.GetHostname(name=new_name)
5811       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5812                    hostname.name)
5813       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5814         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5815                                     " same as given hostname '%s'") %
5816                                     (hostname.name, self.op.new_name),
5817                                     errors.ECODE_INVAL)
5818       new_name = self.op.new_name = hostname.name
5819       if (self.op.ip_check and
5820           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5821         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5822                                    (hostname.ip, new_name),
5823                                    errors.ECODE_NOTUNIQUE)
5824
5825     instance_list = self.cfg.GetInstanceList()
5826     if new_name in instance_list and new_name != instance.name:
5827       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5828                                  new_name, errors.ECODE_EXISTS)
5829
5830   def Exec(self, feedback_fn):
5831     """Rename the instance.
5832
5833     """
5834     inst = self.instance
5835     old_name = inst.name
5836
5837     rename_file_storage = False
5838     if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5839         self.op.new_name != inst.name):
5840       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5841       rename_file_storage = True
5842
5843     self.cfg.RenameInstance(inst.name, self.op.new_name)
5844     # Change the instance lock. This is definitely safe while we hold the BGL.
5845     # Otherwise the new lock would have to be added in acquired mode.
5846     assert self.REQ_BGL
5847     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5848     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5849
5850     # re-read the instance from the configuration after rename
5851     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5852
5853     if rename_file_storage:
5854       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5855       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5856                                                      old_file_storage_dir,
5857                                                      new_file_storage_dir)
5858       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5859                    " (but the instance has been renamed in Ganeti)" %
5860                    (inst.primary_node, old_file_storage_dir,
5861                     new_file_storage_dir))
5862
5863     _StartInstanceDisks(self, inst, None)
5864     try:
5865       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5866                                                  old_name, self.op.debug_level)
5867       msg = result.fail_msg
5868       if msg:
5869         msg = ("Could not run OS rename script for instance %s on node %s"
5870                " (but the instance has been renamed in Ganeti): %s" %
5871                (inst.name, inst.primary_node, msg))
5872         self.proc.LogWarning(msg)
5873     finally:
5874       _ShutdownInstanceDisks(self, inst)
5875
5876     return inst.name
5877
5878
5879 class LUInstanceRemove(LogicalUnit):
5880   """Remove an instance.
5881
5882   """
5883   HPATH = "instance-remove"
5884   HTYPE = constants.HTYPE_INSTANCE
5885   REQ_BGL = False
5886
5887   def ExpandNames(self):
5888     self._ExpandAndLockInstance()
5889     self.needed_locks[locking.LEVEL_NODE] = []
5890     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5891
5892   def DeclareLocks(self, level):
5893     if level == locking.LEVEL_NODE:
5894       self._LockInstancesNodes()
5895
5896   def BuildHooksEnv(self):
5897     """Build hooks env.
5898
5899     This runs on master, primary and secondary nodes of the instance.
5900
5901     """
5902     env = _BuildInstanceHookEnvByObject(self, self.instance)
5903     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5904     return env
5905
5906   def BuildHooksNodes(self):
5907     """Build hooks nodes.
5908
5909     """
5910     nl = [self.cfg.GetMasterNode()]
5911     nl_post = list(self.instance.all_nodes) + nl
5912     return (nl, nl_post)
5913
5914   def CheckPrereq(self):
5915     """Check prerequisites.
5916
5917     This checks that the instance is in the cluster.
5918
5919     """
5920     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5921     assert self.instance is not None, \
5922       "Cannot retrieve locked instance %s" % self.op.instance_name
5923
5924   def Exec(self, feedback_fn):
5925     """Remove the instance.
5926
5927     """
5928     instance = self.instance
5929     logging.info("Shutting down instance %s on node %s",
5930                  instance.name, instance.primary_node)
5931
5932     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5933                                              self.op.shutdown_timeout)
5934     msg = result.fail_msg
5935     if msg:
5936       if self.op.ignore_failures:
5937         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5938       else:
5939         raise errors.OpExecError("Could not shutdown instance %s on"
5940                                  " node %s: %s" %
5941                                  (instance.name, instance.primary_node, msg))
5942
5943     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5944
5945
5946 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5947   """Utility function to remove an instance.
5948
5949   """
5950   logging.info("Removing block devices for instance %s", instance.name)
5951
5952   if not _RemoveDisks(lu, instance):
5953     if not ignore_failures:
5954       raise errors.OpExecError("Can't remove instance's disks")
5955     feedback_fn("Warning: can't remove instance's disks")
5956
5957   logging.info("Removing instance %s out of cluster config", instance.name)
5958
5959   lu.cfg.RemoveInstance(instance.name)
5960
5961   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5962     "Instance lock removal conflict"
5963
5964   # Remove lock for the instance
5965   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5966
5967
5968 class LUInstanceQuery(NoHooksLU):
5969   """Logical unit for querying instances.
5970
5971   """
5972   # pylint: disable-msg=W0142
5973   REQ_BGL = False
5974
5975   def CheckArguments(self):
5976     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5977                              self.op.output_fields, self.op.use_locking)
5978
5979   def ExpandNames(self):
5980     self.iq.ExpandNames(self)
5981
5982   def DeclareLocks(self, level):
5983     self.iq.DeclareLocks(self, level)
5984
5985   def Exec(self, feedback_fn):
5986     return self.iq.OldStyleQuery(self)
5987
5988
5989 class LUInstanceFailover(LogicalUnit):
5990   """Failover an instance.
5991
5992   """
5993   HPATH = "instance-failover"
5994   HTYPE = constants.HTYPE_INSTANCE
5995   REQ_BGL = False
5996
5997   def CheckArguments(self):
5998     """Check the arguments.
5999
6000     """
6001     self.iallocator = getattr(self.op, "iallocator", None)
6002     self.target_node = getattr(self.op, "target_node", None)
6003
6004   def ExpandNames(self):
6005     self._ExpandAndLockInstance()
6006
6007     if self.op.target_node is not None:
6008       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6009
6010     self.needed_locks[locking.LEVEL_NODE] = []
6011     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6012
6013     ignore_consistency = self.op.ignore_consistency
6014     shutdown_timeout = self.op.shutdown_timeout
6015     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6016                                        cleanup=False,
6017                                        iallocator=self.op.iallocator,
6018                                        target_node=self.op.target_node,
6019                                        failover=True,
6020                                        ignore_consistency=ignore_consistency,
6021                                        shutdown_timeout=shutdown_timeout)
6022     self.tasklets = [self._migrater]
6023
6024   def DeclareLocks(self, level):
6025     if level == locking.LEVEL_NODE:
6026       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6027       if instance.disk_template in constants.DTS_EXT_MIRROR:
6028         if self.op.target_node is None:
6029           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6030         else:
6031           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6032                                                    self.op.target_node]
6033         del self.recalculate_locks[locking.LEVEL_NODE]
6034       else:
6035         self._LockInstancesNodes()
6036
6037   def BuildHooksEnv(self):
6038     """Build hooks env.
6039
6040     This runs on master, primary and secondary nodes of the instance.
6041
6042     """
6043     instance = self._migrater.instance
6044     source_node = instance.primary_node
6045     target_node = self._migrater.target_node
6046     env = {
6047       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6048       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6049       "OLD_PRIMARY": source_node,
6050       "NEW_PRIMARY": target_node,
6051       }
6052
6053     if instance.disk_template in constants.DTS_INT_MIRROR:
6054       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6055       env["NEW_SECONDARY"] = source_node
6056     else:
6057       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6058
6059     env.update(_BuildInstanceHookEnvByObject(self, instance))
6060
6061     return env
6062
6063   def BuildHooksNodes(self):
6064     """Build hooks nodes.
6065
6066     """
6067     instance = self._migrater.instance
6068     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6069     return (nl, nl + [instance.primary_node])
6070
6071
6072 class LUInstanceMigrate(LogicalUnit):
6073   """Migrate an instance.
6074
6075   This is migration without shutting down, compared to the failover,
6076   which is done with shutdown.
6077
6078   """
6079   HPATH = "instance-migrate"
6080   HTYPE = constants.HTYPE_INSTANCE
6081   REQ_BGL = False
6082
6083   def ExpandNames(self):
6084     self._ExpandAndLockInstance()
6085
6086     if self.op.target_node is not None:
6087       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6088
6089     self.needed_locks[locking.LEVEL_NODE] = []
6090     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6091
6092     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6093                                        cleanup=self.op.cleanup,
6094                                        iallocator=self.op.iallocator,
6095                                        target_node=self.op.target_node,
6096                                        failover=False,
6097                                        fallback=self.op.allow_failover)
6098     self.tasklets = [self._migrater]
6099
6100   def DeclareLocks(self, level):
6101     if level == locking.LEVEL_NODE:
6102       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6103       if instance.disk_template in constants.DTS_EXT_MIRROR:
6104         if self.op.target_node is None:
6105           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6106         else:
6107           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6108                                                    self.op.target_node]
6109         del self.recalculate_locks[locking.LEVEL_NODE]
6110       else:
6111         self._LockInstancesNodes()
6112
6113   def BuildHooksEnv(self):
6114     """Build hooks env.
6115
6116     This runs on master, primary and secondary nodes of the instance.
6117
6118     """
6119     instance = self._migrater.instance
6120     source_node = instance.primary_node
6121     target_node = self._migrater.target_node
6122     env = _BuildInstanceHookEnvByObject(self, instance)
6123     env.update({
6124       "MIGRATE_LIVE": self._migrater.live,
6125       "MIGRATE_CLEANUP": self.op.cleanup,
6126       "OLD_PRIMARY": source_node,
6127       "NEW_PRIMARY": target_node,
6128       })
6129
6130     if instance.disk_template in constants.DTS_INT_MIRROR:
6131       env["OLD_SECONDARY"] = target_node
6132       env["NEW_SECONDARY"] = source_node
6133     else:
6134       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6135
6136     return env
6137
6138   def BuildHooksNodes(self):
6139     """Build hooks nodes.
6140
6141     """
6142     instance = self._migrater.instance
6143     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6144     return (nl, nl + [instance.primary_node])
6145
6146
6147 class LUInstanceMove(LogicalUnit):
6148   """Move an instance by data-copying.
6149
6150   """
6151   HPATH = "instance-move"
6152   HTYPE = constants.HTYPE_INSTANCE
6153   REQ_BGL = False
6154
6155   def ExpandNames(self):
6156     self._ExpandAndLockInstance()
6157     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6158     self.op.target_node = target_node
6159     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6160     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6161
6162   def DeclareLocks(self, level):
6163     if level == locking.LEVEL_NODE:
6164       self._LockInstancesNodes(primary_only=True)
6165
6166   def BuildHooksEnv(self):
6167     """Build hooks env.
6168
6169     This runs on master, primary and secondary nodes of the instance.
6170
6171     """
6172     env = {
6173       "TARGET_NODE": self.op.target_node,
6174       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6175       }
6176     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6177     return env
6178
6179   def BuildHooksNodes(self):
6180     """Build hooks nodes.
6181
6182     """
6183     nl = [
6184       self.cfg.GetMasterNode(),
6185       self.instance.primary_node,
6186       self.op.target_node,
6187       ]
6188     return (nl, nl)
6189
6190   def CheckPrereq(self):
6191     """Check prerequisites.
6192
6193     This checks that the instance is in the cluster.
6194
6195     """
6196     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6197     assert self.instance is not None, \
6198       "Cannot retrieve locked instance %s" % self.op.instance_name
6199
6200     node = self.cfg.GetNodeInfo(self.op.target_node)
6201     assert node is not None, \
6202       "Cannot retrieve locked node %s" % self.op.target_node
6203
6204     self.target_node = target_node = node.name
6205
6206     if target_node == instance.primary_node:
6207       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6208                                  (instance.name, target_node),
6209                                  errors.ECODE_STATE)
6210
6211     bep = self.cfg.GetClusterInfo().FillBE(instance)
6212
6213     for idx, dsk in enumerate(instance.disks):
6214       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6215         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6216                                    " cannot copy" % idx, errors.ECODE_STATE)
6217
6218     _CheckNodeOnline(self, target_node)
6219     _CheckNodeNotDrained(self, target_node)
6220     _CheckNodeVmCapable(self, target_node)
6221
6222     if instance.admin_up:
6223       # check memory requirements on the secondary node
6224       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6225                            instance.name, bep[constants.BE_MEMORY],
6226                            instance.hypervisor)
6227     else:
6228       self.LogInfo("Not checking memory on the secondary node as"
6229                    " instance will not be started")
6230
6231     # check bridge existance
6232     _CheckInstanceBridgesExist(self, instance, node=target_node)
6233
6234   def Exec(self, feedback_fn):
6235     """Move an instance.
6236
6237     The move is done by shutting it down on its present node, copying
6238     the data over (slow) and starting it on the new node.
6239
6240     """
6241     instance = self.instance
6242
6243     source_node = instance.primary_node
6244     target_node = self.target_node
6245
6246     self.LogInfo("Shutting down instance %s on source node %s",
6247                  instance.name, source_node)
6248
6249     result = self.rpc.call_instance_shutdown(source_node, instance,
6250                                              self.op.shutdown_timeout)
6251     msg = result.fail_msg
6252     if msg:
6253       if self.op.ignore_consistency:
6254         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6255                              " Proceeding anyway. Please make sure node"
6256                              " %s is down. Error details: %s",
6257                              instance.name, source_node, source_node, msg)
6258       else:
6259         raise errors.OpExecError("Could not shutdown instance %s on"
6260                                  " node %s: %s" %
6261                                  (instance.name, source_node, msg))
6262
6263     # create the target disks
6264     try:
6265       _CreateDisks(self, instance, target_node=target_node)
6266     except errors.OpExecError:
6267       self.LogWarning("Device creation failed, reverting...")
6268       try:
6269         _RemoveDisks(self, instance, target_node=target_node)
6270       finally:
6271         self.cfg.ReleaseDRBDMinors(instance.name)
6272         raise
6273
6274     cluster_name = self.cfg.GetClusterInfo().cluster_name
6275
6276     errs = []
6277     # activate, get path, copy the data over
6278     for idx, disk in enumerate(instance.disks):
6279       self.LogInfo("Copying data for disk %d", idx)
6280       result = self.rpc.call_blockdev_assemble(target_node, disk,
6281                                                instance.name, True, idx)
6282       if result.fail_msg:
6283         self.LogWarning("Can't assemble newly created disk %d: %s",
6284                         idx, result.fail_msg)
6285         errs.append(result.fail_msg)
6286         break
6287       dev_path = result.payload
6288       result = self.rpc.call_blockdev_export(source_node, disk,
6289                                              target_node, dev_path,
6290                                              cluster_name)
6291       if result.fail_msg:
6292         self.LogWarning("Can't copy data over for disk %d: %s",
6293                         idx, result.fail_msg)
6294         errs.append(result.fail_msg)
6295         break
6296
6297     if errs:
6298       self.LogWarning("Some disks failed to copy, aborting")
6299       try:
6300         _RemoveDisks(self, instance, target_node=target_node)
6301       finally:
6302         self.cfg.ReleaseDRBDMinors(instance.name)
6303         raise errors.OpExecError("Errors during disk copy: %s" %
6304                                  (",".join(errs),))
6305
6306     instance.primary_node = target_node
6307     self.cfg.Update(instance, feedback_fn)
6308
6309     self.LogInfo("Removing the disks on the original node")
6310     _RemoveDisks(self, instance, target_node=source_node)
6311
6312     # Only start the instance if it's marked as up
6313     if instance.admin_up:
6314       self.LogInfo("Starting instance %s on node %s",
6315                    instance.name, target_node)
6316
6317       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6318                                            ignore_secondaries=True)
6319       if not disks_ok:
6320         _ShutdownInstanceDisks(self, instance)
6321         raise errors.OpExecError("Can't activate the instance's disks")
6322
6323       result = self.rpc.call_instance_start(target_node, instance, None, None)
6324       msg = result.fail_msg
6325       if msg:
6326         _ShutdownInstanceDisks(self, instance)
6327         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6328                                  (instance.name, target_node, msg))
6329
6330
6331 class LUNodeMigrate(LogicalUnit):
6332   """Migrate all instances from a node.
6333
6334   """
6335   HPATH = "node-migrate"
6336   HTYPE = constants.HTYPE_NODE
6337   REQ_BGL = False
6338
6339   def CheckArguments(self):
6340     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6341
6342   def ExpandNames(self):
6343     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6344
6345     self.needed_locks = {}
6346
6347     # Create tasklets for migrating instances for all instances on this node
6348     names = []
6349     tasklets = []
6350
6351     self.lock_all_nodes = False
6352
6353     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6354       logging.debug("Migrating instance %s", inst.name)
6355       names.append(inst.name)
6356
6357       tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False,
6358                                         iallocator=self.op.iallocator,
6359                                         taget_node=None))
6360
6361       if inst.disk_template in constants.DTS_EXT_MIRROR:
6362         # We need to lock all nodes, as the iallocator will choose the
6363         # destination nodes afterwards
6364         self.lock_all_nodes = True
6365
6366     self.tasklets = tasklets
6367
6368     # Declare node locks
6369     if self.lock_all_nodes:
6370       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6371     else:
6372       self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6373       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6374
6375     # Declare instance locks
6376     self.needed_locks[locking.LEVEL_INSTANCE] = names
6377
6378   def DeclareLocks(self, level):
6379     if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6380       self._LockInstancesNodes()
6381
6382   def BuildHooksEnv(self):
6383     """Build hooks env.
6384
6385     This runs on the master, the primary and all the secondaries.
6386
6387     """
6388     return {
6389       "NODE_NAME": self.op.node_name,
6390       }
6391
6392   def BuildHooksNodes(self):
6393     """Build hooks nodes.
6394
6395     """
6396     nl = [self.cfg.GetMasterNode()]
6397     return (nl, nl)
6398
6399
6400 class TLMigrateInstance(Tasklet):
6401   """Tasklet class for instance migration.
6402
6403   @type live: boolean
6404   @ivar live: whether the migration will be done live or non-live;
6405       this variable is initalized only after CheckPrereq has run
6406   @type cleanup: boolean
6407   @ivar cleanup: Wheater we cleanup from a failed migration
6408   @type iallocator: string
6409   @ivar iallocator: The iallocator used to determine target_node
6410   @type target_node: string
6411   @ivar target_node: If given, the target_node to reallocate the instance to
6412   @type failover: boolean
6413   @ivar failover: Whether operation results in failover or migration
6414   @type fallback: boolean
6415   @ivar fallback: Whether fallback to failover is allowed if migration not
6416                   possible
6417   @type ignore_consistency: boolean
6418   @ivar ignore_consistency: Wheter we should ignore consistency between source
6419                             and target node
6420   @type shutdown_timeout: int
6421   @ivar shutdown_timeout: In case of failover timeout of the shutdown
6422
6423   """
6424   def __init__(self, lu, instance_name, cleanup=False, iallocator=None,
6425                target_node=None, failover=False, fallback=False,
6426                ignore_consistency=False,
6427                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6428     """Initializes this class.
6429
6430     """
6431     Tasklet.__init__(self, lu)
6432
6433     # Parameters
6434     self.instance_name = instance_name
6435     self.cleanup = cleanup
6436     self.live = False # will be overridden later
6437     self.iallocator = iallocator
6438     self.target_node = target_node
6439     self.failover = failover
6440     self.fallback = fallback
6441     self.ignore_consistency = ignore_consistency
6442     self.shutdown_timeout = shutdown_timeout
6443
6444   def CheckPrereq(self):
6445     """Check prerequisites.
6446
6447     This checks that the instance is in the cluster.
6448
6449     """
6450     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6451     instance = self.cfg.GetInstanceInfo(instance_name)
6452     assert instance is not None
6453     self.instance = instance
6454
6455     if (not self.cleanup and not instance.admin_up and not self.failover and
6456         self.fallback):
6457       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6458                       " to failover")
6459       self.failover = True
6460
6461     if instance.disk_template not in constants.DTS_MIRRORED:
6462       if self.failover:
6463         text = "failovers"
6464       else:
6465         text = "migrations"
6466       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6467                                  " %s" % (instance.disk_template, text),
6468                                  errors.ECODE_STATE)
6469
6470     if instance.disk_template in constants.DTS_EXT_MIRROR:
6471       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6472
6473       if self.iallocator:
6474         self._RunAllocator()
6475
6476       # self.target_node is already populated, either directly or by the
6477       # iallocator run
6478       target_node = self.target_node
6479
6480       if len(self.lu.tasklets) == 1:
6481         # It is safe to release locks only when we're the only tasklet in the LU
6482         _ReleaseLocks(self, locking.LEVEL_NODE,
6483                       keep=[instance.primary_node, self.target_node])
6484
6485     else:
6486       secondary_nodes = instance.secondary_nodes
6487       if not secondary_nodes:
6488         raise errors.ConfigurationError("No secondary node but using"
6489                                         " %s disk template" %
6490                                         instance.disk_template)
6491       target_node = secondary_nodes[0]
6492       if self.iallocator or (self.target_node and
6493                              self.target_node != target_node):
6494         if self.failover:
6495           text = "failed over"
6496         else:
6497           text = "migrated"
6498         raise errors.OpPrereqError("Instances with disk template %s cannot"
6499                                    " be %s to arbitrary nodes"
6500                                    " (neither an iallocator nor a target"
6501                                    " node can be passed)" %
6502                                    (instance.disk_template, text),
6503                                    errors.ECODE_INVAL)
6504
6505     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6506
6507     # check memory requirements on the secondary node
6508     if not self.failover or instance.admin_up:
6509       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6510                            instance.name, i_be[constants.BE_MEMORY],
6511                            instance.hypervisor)
6512     else:
6513       self.lu.LogInfo("Not checking memory on the secondary node as"
6514                       " instance will not be started")
6515
6516     # check bridge existance
6517     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6518
6519     if not self.cleanup:
6520       _CheckNodeNotDrained(self.lu, target_node)
6521       if not self.failover:
6522         result = self.rpc.call_instance_migratable(instance.primary_node,
6523                                                    instance)
6524         if result.fail_msg and self.fallback:
6525           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6526                           " failover")
6527           self.failover = True
6528         else:
6529           result.Raise("Can't migrate, please use failover",
6530                        prereq=True, ecode=errors.ECODE_STATE)
6531
6532     assert not (self.failover and self.cleanup)
6533
6534   def _RunAllocator(self):
6535     """Run the allocator based on input opcode.
6536
6537     """
6538     ial = IAllocator(self.cfg, self.rpc,
6539                      mode=constants.IALLOCATOR_MODE_RELOC,
6540                      name=self.instance_name,
6541                      # TODO See why hail breaks with a single node below
6542                      relocate_from=[self.instance.primary_node,
6543                                     self.instance.primary_node],
6544                      )
6545
6546     ial.Run(self.iallocator)
6547
6548     if not ial.success:
6549       raise errors.OpPrereqError("Can't compute nodes using"
6550                                  " iallocator '%s': %s" %
6551                                  (self.iallocator, ial.info),
6552                                  errors.ECODE_NORES)
6553     if len(ial.result) != ial.required_nodes:
6554       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6555                                  " of nodes (%s), required %s" %
6556                                  (self.iallocator, len(ial.result),
6557                                   ial.required_nodes), errors.ECODE_FAULT)
6558     self.target_node = ial.result[0]
6559     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6560                  self.instance_name, self.iallocator,
6561                  utils.CommaJoin(ial.result))
6562
6563     if not self.failover:
6564       if self.lu.op.live is not None and self.lu.op.mode is not None:
6565         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6566                                    " parameters are accepted",
6567                                    errors.ECODE_INVAL)
6568       if self.lu.op.live is not None:
6569         if self.lu.op.live:
6570           self.lu.op.mode = constants.HT_MIGRATION_LIVE
6571         else:
6572           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6573         # reset the 'live' parameter to None so that repeated
6574         # invocations of CheckPrereq do not raise an exception
6575         self.lu.op.live = None
6576       elif self.lu.op.mode is None:
6577         # read the default value from the hypervisor
6578         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6579                                                 skip_globals=False)
6580         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6581
6582       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6583     else:
6584       # Failover is never live
6585       self.live = False
6586
6587   def _WaitUntilSync(self):
6588     """Poll with custom rpc for disk sync.
6589
6590     This uses our own step-based rpc call.
6591
6592     """
6593     self.feedback_fn("* wait until resync is done")
6594     all_done = False
6595     while not all_done:
6596       all_done = True
6597       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6598                                             self.nodes_ip,
6599                                             self.instance.disks)
6600       min_percent = 100
6601       for node, nres in result.items():
6602         nres.Raise("Cannot resync disks on node %s" % node)
6603         node_done, node_percent = nres.payload
6604         all_done = all_done and node_done
6605         if node_percent is not None:
6606           min_percent = min(min_percent, node_percent)
6607       if not all_done:
6608         if min_percent < 100:
6609           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6610         time.sleep(2)
6611
6612   def _EnsureSecondary(self, node):
6613     """Demote a node to secondary.
6614
6615     """
6616     self.feedback_fn("* switching node %s to secondary mode" % node)
6617
6618     for dev in self.instance.disks:
6619       self.cfg.SetDiskID(dev, node)
6620
6621     result = self.rpc.call_blockdev_close(node, self.instance.name,
6622                                           self.instance.disks)
6623     result.Raise("Cannot change disk to secondary on node %s" % node)
6624
6625   def _GoStandalone(self):
6626     """Disconnect from the network.
6627
6628     """
6629     self.feedback_fn("* changing into standalone mode")
6630     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6631                                                self.instance.disks)
6632     for node, nres in result.items():
6633       nres.Raise("Cannot disconnect disks node %s" % node)
6634
6635   def _GoReconnect(self, multimaster):
6636     """Reconnect to the network.
6637
6638     """
6639     if multimaster:
6640       msg = "dual-master"
6641     else:
6642       msg = "single-master"
6643     self.feedback_fn("* changing disks into %s mode" % msg)
6644     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6645                                            self.instance.disks,
6646                                            self.instance.name, multimaster)
6647     for node, nres in result.items():
6648       nres.Raise("Cannot change disks config on node %s" % node)
6649
6650   def _ExecCleanup(self):
6651     """Try to cleanup after a failed migration.
6652
6653     The cleanup is done by:
6654       - check that the instance is running only on one node
6655         (and update the config if needed)
6656       - change disks on its secondary node to secondary
6657       - wait until disks are fully synchronized
6658       - disconnect from the network
6659       - change disks into single-master mode
6660       - wait again until disks are fully synchronized
6661
6662     """
6663     instance = self.instance
6664     target_node = self.target_node
6665     source_node = self.source_node
6666
6667     # check running on only one node
6668     self.feedback_fn("* checking where the instance actually runs"
6669                      " (if this hangs, the hypervisor might be in"
6670                      " a bad state)")
6671     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6672     for node, result in ins_l.items():
6673       result.Raise("Can't contact node %s" % node)
6674
6675     runningon_source = instance.name in ins_l[source_node].payload
6676     runningon_target = instance.name in ins_l[target_node].payload
6677
6678     if runningon_source and runningon_target:
6679       raise errors.OpExecError("Instance seems to be running on two nodes,"
6680                                " or the hypervisor is confused. You will have"
6681                                " to ensure manually that it runs only on one"
6682                                " and restart this operation.")
6683
6684     if not (runningon_source or runningon_target):
6685       raise errors.OpExecError("Instance does not seem to be running at all."
6686                                " In this case, it's safer to repair by"
6687                                " running 'gnt-instance stop' to ensure disk"
6688                                " shutdown, and then restarting it.")
6689
6690     if runningon_target:
6691       # the migration has actually succeeded, we need to update the config
6692       self.feedback_fn("* instance running on secondary node (%s),"
6693                        " updating config" % target_node)
6694       instance.primary_node = target_node
6695       self.cfg.Update(instance, self.feedback_fn)
6696       demoted_node = source_node
6697     else:
6698       self.feedback_fn("* instance confirmed to be running on its"
6699                        " primary node (%s)" % source_node)
6700       demoted_node = target_node
6701
6702     if instance.disk_template in constants.DTS_INT_MIRROR:
6703       self._EnsureSecondary(demoted_node)
6704       try:
6705         self._WaitUntilSync()
6706       except errors.OpExecError:
6707         # we ignore here errors, since if the device is standalone, it
6708         # won't be able to sync
6709         pass
6710       self._GoStandalone()
6711       self._GoReconnect(False)
6712       self._WaitUntilSync()
6713
6714     self.feedback_fn("* done")
6715
6716   def _RevertDiskStatus(self):
6717     """Try to revert the disk status after a failed migration.
6718
6719     """
6720     target_node = self.target_node
6721     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6722       return
6723
6724     try:
6725       self._EnsureSecondary(target_node)
6726       self._GoStandalone()
6727       self._GoReconnect(False)
6728       self._WaitUntilSync()
6729     except errors.OpExecError, err:
6730       self.lu.LogWarning("Migration failed and I can't reconnect the"
6731                          " drives: error '%s'\n"
6732                          "Please look and recover the instance status" %
6733                          str(err))
6734
6735   def _AbortMigration(self):
6736     """Call the hypervisor code to abort a started migration.
6737
6738     """
6739     instance = self.instance
6740     target_node = self.target_node
6741     migration_info = self.migration_info
6742
6743     abort_result = self.rpc.call_finalize_migration(target_node,
6744                                                     instance,
6745                                                     migration_info,
6746                                                     False)
6747     abort_msg = abort_result.fail_msg
6748     if abort_msg:
6749       logging.error("Aborting migration failed on target node %s: %s",
6750                     target_node, abort_msg)
6751       # Don't raise an exception here, as we stil have to try to revert the
6752       # disk status, even if this step failed.
6753
6754   def _ExecMigration(self):
6755     """Migrate an instance.
6756
6757     The migrate is done by:
6758       - change the disks into dual-master mode
6759       - wait until disks are fully synchronized again
6760       - migrate the instance
6761       - change disks on the new secondary node (the old primary) to secondary
6762       - wait until disks are fully synchronized
6763       - change disks into single-master mode
6764
6765     """
6766     instance = self.instance
6767     target_node = self.target_node
6768     source_node = self.source_node
6769
6770     self.feedback_fn("* checking disk consistency between source and target")
6771     for dev in instance.disks:
6772       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6773         raise errors.OpExecError("Disk %s is degraded or not fully"
6774                                  " synchronized on target node,"
6775                                  " aborting migrate." % dev.iv_name)
6776
6777     # First get the migration information from the remote node
6778     result = self.rpc.call_migration_info(source_node, instance)
6779     msg = result.fail_msg
6780     if msg:
6781       log_err = ("Failed fetching source migration information from %s: %s" %
6782                  (source_node, msg))
6783       logging.error(log_err)
6784       raise errors.OpExecError(log_err)
6785
6786     self.migration_info = migration_info = result.payload
6787
6788     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6789       # Then switch the disks to master/master mode
6790       self._EnsureSecondary(target_node)
6791       self._GoStandalone()
6792       self._GoReconnect(True)
6793       self._WaitUntilSync()
6794
6795     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6796     result = self.rpc.call_accept_instance(target_node,
6797                                            instance,
6798                                            migration_info,
6799                                            self.nodes_ip[target_node])
6800
6801     msg = result.fail_msg
6802     if msg:
6803       logging.error("Instance pre-migration failed, trying to revert"
6804                     " disk status: %s", msg)
6805       self.feedback_fn("Pre-migration failed, aborting")
6806       self._AbortMigration()
6807       self._RevertDiskStatus()
6808       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6809                                (instance.name, msg))
6810
6811     self.feedback_fn("* migrating instance to %s" % target_node)
6812     result = self.rpc.call_instance_migrate(source_node, instance,
6813                                             self.nodes_ip[target_node],
6814                                             self.live)
6815     msg = result.fail_msg
6816     if msg:
6817       logging.error("Instance migration failed, trying to revert"
6818                     " disk status: %s", msg)
6819       self.feedback_fn("Migration failed, aborting")
6820       self._AbortMigration()
6821       self._RevertDiskStatus()
6822       raise errors.OpExecError("Could not migrate instance %s: %s" %
6823                                (instance.name, msg))
6824
6825     instance.primary_node = target_node
6826     # distribute new instance config to the other nodes
6827     self.cfg.Update(instance, self.feedback_fn)
6828
6829     result = self.rpc.call_finalize_migration(target_node,
6830                                               instance,
6831                                               migration_info,
6832                                               True)
6833     msg = result.fail_msg
6834     if msg:
6835       logging.error("Instance migration succeeded, but finalization failed:"
6836                     " %s", msg)
6837       raise errors.OpExecError("Could not finalize instance migration: %s" %
6838                                msg)
6839
6840     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6841       self._EnsureSecondary(source_node)
6842       self._WaitUntilSync()
6843       self._GoStandalone()
6844       self._GoReconnect(False)
6845       self._WaitUntilSync()
6846
6847     self.feedback_fn("* done")
6848
6849   def _ExecFailover(self):
6850     """Failover an instance.
6851
6852     The failover is done by shutting it down on its present node and
6853     starting it on the secondary.
6854
6855     """
6856     instance = self.instance
6857     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6858
6859     source_node = instance.primary_node
6860     target_node = self.target_node
6861
6862     if instance.admin_up:
6863       self.feedback_fn("* checking disk consistency between source and target")
6864       for dev in instance.disks:
6865         # for drbd, these are drbd over lvm
6866         if not _CheckDiskConsistency(self, dev, target_node, False):
6867           if not self.ignore_consistency:
6868             raise errors.OpExecError("Disk %s is degraded on target node,"
6869                                      " aborting failover." % dev.iv_name)
6870     else:
6871       self.feedback_fn("* not checking disk consistency as instance is not"
6872                        " running")
6873
6874     self.feedback_fn("* shutting down instance on source node")
6875     logging.info("Shutting down instance %s on node %s",
6876                  instance.name, source_node)
6877
6878     result = self.rpc.call_instance_shutdown(source_node, instance,
6879                                              self.shutdown_timeout)
6880     msg = result.fail_msg
6881     if msg:
6882       if self.ignore_consistency or primary_node.offline:
6883         self.lu.LogWarning("Could not shutdown instance %s on node %s."
6884                            " Proceeding anyway. Please make sure node"
6885                            " %s is down. Error details: %s",
6886                            instance.name, source_node, source_node, msg)
6887       else:
6888         raise errors.OpExecError("Could not shutdown instance %s on"
6889                                  " node %s: %s" %
6890                                  (instance.name, source_node, msg))
6891
6892     self.feedback_fn("* deactivating the instance's disks on source node")
6893     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6894       raise errors.OpExecError("Can't shut down the instance's disks.")
6895
6896     instance.primary_node = target_node
6897     # distribute new instance config to the other nodes
6898     self.cfg.Update(instance, self.feedback_fn)
6899
6900     # Only start the instance if it's marked as up
6901     if instance.admin_up:
6902       self.feedback_fn("* activating the instance's disks on target node")
6903       logging.info("Starting instance %s on node %s",
6904                    instance.name, target_node)
6905
6906       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6907                                            ignore_secondaries=True)
6908       if not disks_ok:
6909         _ShutdownInstanceDisks(self, instance)
6910         raise errors.OpExecError("Can't activate the instance's disks")
6911
6912       self.feedback_fn("* starting the instance on the target node")
6913       result = self.rpc.call_instance_start(target_node, instance, None, None)
6914       msg = result.fail_msg
6915       if msg:
6916         _ShutdownInstanceDisks(self, instance)
6917         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6918                                  (instance.name, target_node, msg))
6919
6920   def Exec(self, feedback_fn):
6921     """Perform the migration.
6922
6923     """
6924     self.feedback_fn = feedback_fn
6925     self.source_node = self.instance.primary_node
6926
6927     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6928     if self.instance.disk_template in constants.DTS_INT_MIRROR:
6929       self.target_node = self.instance.secondary_nodes[0]
6930       # Otherwise self.target_node has been populated either
6931       # directly, or through an iallocator.
6932
6933     self.all_nodes = [self.source_node, self.target_node]
6934     self.nodes_ip = {
6935       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6936       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6937       }
6938
6939     if self.failover:
6940       feedback_fn("Failover instance %s" % self.instance.name)
6941       self._ExecFailover()
6942     else:
6943       feedback_fn("Migrating instance %s" % self.instance.name)
6944
6945       if self.cleanup:
6946         return self._ExecCleanup()
6947       else:
6948         return self._ExecMigration()
6949
6950
6951 def _CreateBlockDev(lu, node, instance, device, force_create,
6952                     info, force_open):
6953   """Create a tree of block devices on a given node.
6954
6955   If this device type has to be created on secondaries, create it and
6956   all its children.
6957
6958   If not, just recurse to children keeping the same 'force' value.
6959
6960   @param lu: the lu on whose behalf we execute
6961   @param node: the node on which to create the device
6962   @type instance: L{objects.Instance}
6963   @param instance: the instance which owns the device
6964   @type device: L{objects.Disk}
6965   @param device: the device to create
6966   @type force_create: boolean
6967   @param force_create: whether to force creation of this device; this
6968       will be change to True whenever we find a device which has
6969       CreateOnSecondary() attribute
6970   @param info: the extra 'metadata' we should attach to the device
6971       (this will be represented as a LVM tag)
6972   @type force_open: boolean
6973   @param force_open: this parameter will be passes to the
6974       L{backend.BlockdevCreate} function where it specifies
6975       whether we run on primary or not, and it affects both
6976       the child assembly and the device own Open() execution
6977
6978   """
6979   if device.CreateOnSecondary():
6980     force_create = True
6981
6982   if device.children:
6983     for child in device.children:
6984       _CreateBlockDev(lu, node, instance, child, force_create,
6985                       info, force_open)
6986
6987   if not force_create:
6988     return
6989
6990   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6991
6992
6993 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6994   """Create a single block device on a given node.
6995
6996   This will not recurse over children of the device, so they must be
6997   created in advance.
6998
6999   @param lu: the lu on whose behalf we execute
7000   @param node: the node on which to create the device
7001   @type instance: L{objects.Instance}
7002   @param instance: the instance which owns the device
7003   @type device: L{objects.Disk}
7004   @param device: the device to create
7005   @param info: the extra 'metadata' we should attach to the device
7006       (this will be represented as a LVM tag)
7007   @type force_open: boolean
7008   @param force_open: this parameter will be passes to the
7009       L{backend.BlockdevCreate} function where it specifies
7010       whether we run on primary or not, and it affects both
7011       the child assembly and the device own Open() execution
7012
7013   """
7014   lu.cfg.SetDiskID(device, node)
7015   result = lu.rpc.call_blockdev_create(node, device, device.size,
7016                                        instance.name, force_open, info)
7017   result.Raise("Can't create block device %s on"
7018                " node %s for instance %s" % (device, node, instance.name))
7019   if device.physical_id is None:
7020     device.physical_id = result.payload
7021
7022
7023 def _GenerateUniqueNames(lu, exts):
7024   """Generate a suitable LV name.
7025
7026   This will generate a logical volume name for the given instance.
7027
7028   """
7029   results = []
7030   for val in exts:
7031     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7032     results.append("%s%s" % (new_id, val))
7033   return results
7034
7035
7036 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7037                          iv_name, p_minor, s_minor):
7038   """Generate a drbd8 device complete with its children.
7039
7040   """
7041   assert len(vgnames) == len(names) == 2
7042   port = lu.cfg.AllocatePort()
7043   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7044   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7045                           logical_id=(vgnames[0], names[0]))
7046   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7047                           logical_id=(vgnames[1], names[1]))
7048   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7049                           logical_id=(primary, secondary, port,
7050                                       p_minor, s_minor,
7051                                       shared_secret),
7052                           children=[dev_data, dev_meta],
7053                           iv_name=iv_name)
7054   return drbd_dev
7055
7056
7057 def _GenerateDiskTemplate(lu, template_name,
7058                           instance_name, primary_node,
7059                           secondary_nodes, disk_info,
7060                           file_storage_dir, file_driver,
7061                           base_index, feedback_fn):
7062   """Generate the entire disk layout for a given template type.
7063
7064   """
7065   #TODO: compute space requirements
7066
7067   vgname = lu.cfg.GetVGName()
7068   disk_count = len(disk_info)
7069   disks = []
7070   if template_name == constants.DT_DISKLESS:
7071     pass
7072   elif template_name == constants.DT_PLAIN:
7073     if len(secondary_nodes) != 0:
7074       raise errors.ProgrammerError("Wrong template configuration")
7075
7076     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7077                                       for i in range(disk_count)])
7078     for idx, disk in enumerate(disk_info):
7079       disk_index = idx + base_index
7080       vg = disk.get(constants.IDISK_VG, vgname)
7081       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7082       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7083                               size=disk[constants.IDISK_SIZE],
7084                               logical_id=(vg, names[idx]),
7085                               iv_name="disk/%d" % disk_index,
7086                               mode=disk[constants.IDISK_MODE])
7087       disks.append(disk_dev)
7088   elif template_name == constants.DT_DRBD8:
7089     if len(secondary_nodes) != 1:
7090       raise errors.ProgrammerError("Wrong template configuration")
7091     remote_node = secondary_nodes[0]
7092     minors = lu.cfg.AllocateDRBDMinor(
7093       [primary_node, remote_node] * len(disk_info), instance_name)
7094
7095     names = []
7096     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7097                                                for i in range(disk_count)]):
7098       names.append(lv_prefix + "_data")
7099       names.append(lv_prefix + "_meta")
7100     for idx, disk in enumerate(disk_info):
7101       disk_index = idx + base_index
7102       data_vg = disk.get(constants.IDISK_VG, vgname)
7103       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7104       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7105                                       disk[constants.IDISK_SIZE],
7106                                       [data_vg, meta_vg],
7107                                       names[idx * 2:idx * 2 + 2],
7108                                       "disk/%d" % disk_index,
7109                                       minors[idx * 2], minors[idx * 2 + 1])
7110       disk_dev.mode = disk[constants.IDISK_MODE]
7111       disks.append(disk_dev)
7112   elif template_name == constants.DT_FILE:
7113     if len(secondary_nodes) != 0:
7114       raise errors.ProgrammerError("Wrong template configuration")
7115
7116     opcodes.RequireFileStorage()
7117
7118     for idx, disk in enumerate(disk_info):
7119       disk_index = idx + base_index
7120       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7121                               size=disk[constants.IDISK_SIZE],
7122                               iv_name="disk/%d" % disk_index,
7123                               logical_id=(file_driver,
7124                                           "%s/disk%d" % (file_storage_dir,
7125                                                          disk_index)),
7126                               mode=disk[constants.IDISK_MODE])
7127       disks.append(disk_dev)
7128   elif template_name == constants.DT_SHARED_FILE:
7129     if len(secondary_nodes) != 0:
7130       raise errors.ProgrammerError("Wrong template configuration")
7131
7132     opcodes.RequireSharedFileStorage()
7133
7134     for idx, disk in enumerate(disk_info):
7135       disk_index = idx + base_index
7136       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7137                               size=disk[constants.IDISK_SIZE],
7138                               iv_name="disk/%d" % disk_index,
7139                               logical_id=(file_driver,
7140                                           "%s/disk%d" % (file_storage_dir,
7141                                                          disk_index)),
7142                               mode=disk[constants.IDISK_MODE])
7143       disks.append(disk_dev)
7144   elif template_name == constants.DT_BLOCK:
7145     if len(secondary_nodes) != 0:
7146       raise errors.ProgrammerError("Wrong template configuration")
7147
7148     for idx, disk in enumerate(disk_info):
7149       disk_index = idx + base_index
7150       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7151                               size=disk[constants.IDISK_SIZE],
7152                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7153                                           disk[constants.IDISK_ADOPT]),
7154                               iv_name="disk/%d" % disk_index,
7155                               mode=disk[constants.IDISK_MODE])
7156       disks.append(disk_dev)
7157
7158   else:
7159     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7160   return disks
7161
7162
7163 def _GetInstanceInfoText(instance):
7164   """Compute that text that should be added to the disk's metadata.
7165
7166   """
7167   return "originstname+%s" % instance.name
7168
7169
7170 def _CalcEta(time_taken, written, total_size):
7171   """Calculates the ETA based on size written and total size.
7172
7173   @param time_taken: The time taken so far
7174   @param written: amount written so far
7175   @param total_size: The total size of data to be written
7176   @return: The remaining time in seconds
7177
7178   """
7179   avg_time = time_taken / float(written)
7180   return (total_size - written) * avg_time
7181
7182
7183 def _WipeDisks(lu, instance):
7184   """Wipes instance disks.
7185
7186   @type lu: L{LogicalUnit}
7187   @param lu: the logical unit on whose behalf we execute
7188   @type instance: L{objects.Instance}
7189   @param instance: the instance whose disks we should create
7190   @return: the success of the wipe
7191
7192   """
7193   node = instance.primary_node
7194
7195   for device in instance.disks:
7196     lu.cfg.SetDiskID(device, node)
7197
7198   logging.info("Pause sync of instance %s disks", instance.name)
7199   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7200
7201   for idx, success in enumerate(result.payload):
7202     if not success:
7203       logging.warn("pause-sync of instance %s for disks %d failed",
7204                    instance.name, idx)
7205
7206   try:
7207     for idx, device in enumerate(instance.disks):
7208       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7209       # MAX_WIPE_CHUNK at max
7210       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7211                             constants.MIN_WIPE_CHUNK_PERCENT)
7212       # we _must_ make this an int, otherwise rounding errors will
7213       # occur
7214       wipe_chunk_size = int(wipe_chunk_size)
7215
7216       lu.LogInfo("* Wiping disk %d", idx)
7217       logging.info("Wiping disk %d for instance %s, node %s using"
7218                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7219
7220       offset = 0
7221       size = device.size
7222       last_output = 0
7223       start_time = time.time()
7224
7225       while offset < size:
7226         wipe_size = min(wipe_chunk_size, size - offset)
7227         logging.debug("Wiping disk %d, offset %s, chunk %s",
7228                       idx, offset, wipe_size)
7229         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7230         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7231                      (idx, offset, wipe_size))
7232         now = time.time()
7233         offset += wipe_size
7234         if now - last_output >= 60:
7235           eta = _CalcEta(now - start_time, offset, size)
7236           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7237                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7238           last_output = now
7239   finally:
7240     logging.info("Resume sync of instance %s disks", instance.name)
7241
7242     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7243
7244     for idx, success in enumerate(result.payload):
7245       if not success:
7246         lu.LogWarning("Warning: Resume sync of disk %d failed. Please have a"
7247                       " look at the status and troubleshoot the issue.", idx)
7248         logging.warn("resume-sync of instance %s for disks %d failed",
7249                      instance.name, idx)
7250
7251
7252 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7253   """Create all disks for an instance.
7254
7255   This abstracts away some work from AddInstance.
7256
7257   @type lu: L{LogicalUnit}
7258   @param lu: the logical unit on whose behalf we execute
7259   @type instance: L{objects.Instance}
7260   @param instance: the instance whose disks we should create
7261   @type to_skip: list
7262   @param to_skip: list of indices to skip
7263   @type target_node: string
7264   @param target_node: if passed, overrides the target node for creation
7265   @rtype: boolean
7266   @return: the success of the creation
7267
7268   """
7269   info = _GetInstanceInfoText(instance)
7270   if target_node is None:
7271     pnode = instance.primary_node
7272     all_nodes = instance.all_nodes
7273   else:
7274     pnode = target_node
7275     all_nodes = [pnode]
7276
7277   if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7278     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7279     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7280
7281     result.Raise("Failed to create directory '%s' on"
7282                  " node %s" % (file_storage_dir, pnode))
7283
7284   # Note: this needs to be kept in sync with adding of disks in
7285   # LUInstanceSetParams
7286   for idx, device in enumerate(instance.disks):
7287     if to_skip and idx in to_skip:
7288       continue
7289     logging.info("Creating volume %s for instance %s",
7290                  device.iv_name, instance.name)
7291     #HARDCODE
7292     for node in all_nodes:
7293       f_create = node == pnode
7294       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7295
7296
7297 def _RemoveDisks(lu, instance, target_node=None):
7298   """Remove all disks for an instance.
7299
7300   This abstracts away some work from `AddInstance()` and
7301   `RemoveInstance()`. Note that in case some of the devices couldn't
7302   be removed, the removal will continue with the other ones (compare
7303   with `_CreateDisks()`).
7304
7305   @type lu: L{LogicalUnit}
7306   @param lu: the logical unit on whose behalf we execute
7307   @type instance: L{objects.Instance}
7308   @param instance: the instance whose disks we should remove
7309   @type target_node: string
7310   @param target_node: used to override the node on which to remove the disks
7311   @rtype: boolean
7312   @return: the success of the removal
7313
7314   """
7315   logging.info("Removing block devices for instance %s", instance.name)
7316
7317   all_result = True
7318   for device in instance.disks:
7319     if target_node:
7320       edata = [(target_node, device)]
7321     else:
7322       edata = device.ComputeNodeTree(instance.primary_node)
7323     for node, disk in edata:
7324       lu.cfg.SetDiskID(disk, node)
7325       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7326       if msg:
7327         lu.LogWarning("Could not remove block device %s on node %s,"
7328                       " continuing anyway: %s", device.iv_name, node, msg)
7329         all_result = False
7330
7331   if instance.disk_template == constants.DT_FILE:
7332     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7333     if target_node:
7334       tgt = target_node
7335     else:
7336       tgt = instance.primary_node
7337     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7338     if result.fail_msg:
7339       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7340                     file_storage_dir, instance.primary_node, result.fail_msg)
7341       all_result = False
7342
7343   return all_result
7344
7345
7346 def _ComputeDiskSizePerVG(disk_template, disks):
7347   """Compute disk size requirements in the volume group
7348
7349   """
7350   def _compute(disks, payload):
7351     """Universal algorithm.
7352
7353     """
7354     vgs = {}
7355     for disk in disks:
7356       vgs[disk[constants.IDISK_VG]] = \
7357         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7358
7359     return vgs
7360
7361   # Required free disk space as a function of disk and swap space
7362   req_size_dict = {
7363     constants.DT_DISKLESS: {},
7364     constants.DT_PLAIN: _compute(disks, 0),
7365     # 128 MB are added for drbd metadata for each disk
7366     constants.DT_DRBD8: _compute(disks, 128),
7367     constants.DT_FILE: {},
7368     constants.DT_SHARED_FILE: {},
7369   }
7370
7371   if disk_template not in req_size_dict:
7372     raise errors.ProgrammerError("Disk template '%s' size requirement"
7373                                  " is unknown" %  disk_template)
7374
7375   return req_size_dict[disk_template]
7376
7377
7378 def _ComputeDiskSize(disk_template, disks):
7379   """Compute disk size requirements in the volume group
7380
7381   """
7382   # Required free disk space as a function of disk and swap space
7383   req_size_dict = {
7384     constants.DT_DISKLESS: None,
7385     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7386     # 128 MB are added for drbd metadata for each disk
7387     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7388     constants.DT_FILE: None,
7389     constants.DT_SHARED_FILE: 0,
7390     constants.DT_BLOCK: 0,
7391   }
7392
7393   if disk_template not in req_size_dict:
7394     raise errors.ProgrammerError("Disk template '%s' size requirement"
7395                                  " is unknown" %  disk_template)
7396
7397   return req_size_dict[disk_template]
7398
7399
7400 def _FilterVmNodes(lu, nodenames):
7401   """Filters out non-vm_capable nodes from a list.
7402
7403   @type lu: L{LogicalUnit}
7404   @param lu: the logical unit for which we check
7405   @type nodenames: list
7406   @param nodenames: the list of nodes on which we should check
7407   @rtype: list
7408   @return: the list of vm-capable nodes
7409
7410   """
7411   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7412   return [name for name in nodenames if name not in vm_nodes]
7413
7414
7415 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7416   """Hypervisor parameter validation.
7417
7418   This function abstract the hypervisor parameter validation to be
7419   used in both instance create and instance modify.
7420
7421   @type lu: L{LogicalUnit}
7422   @param lu: the logical unit for which we check
7423   @type nodenames: list
7424   @param nodenames: the list of nodes on which we should check
7425   @type hvname: string
7426   @param hvname: the name of the hypervisor we should use
7427   @type hvparams: dict
7428   @param hvparams: the parameters which we need to check
7429   @raise errors.OpPrereqError: if the parameters are not valid
7430
7431   """
7432   nodenames = _FilterVmNodes(lu, nodenames)
7433   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7434                                                   hvname,
7435                                                   hvparams)
7436   for node in nodenames:
7437     info = hvinfo[node]
7438     if info.offline:
7439       continue
7440     info.Raise("Hypervisor parameter validation failed on node %s" % node)
7441
7442
7443 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7444   """OS parameters validation.
7445
7446   @type lu: L{LogicalUnit}
7447   @param lu: the logical unit for which we check
7448   @type required: boolean
7449   @param required: whether the validation should fail if the OS is not
7450       found
7451   @type nodenames: list
7452   @param nodenames: the list of nodes on which we should check
7453   @type osname: string
7454   @param osname: the name of the hypervisor we should use
7455   @type osparams: dict
7456   @param osparams: the parameters which we need to check
7457   @raise errors.OpPrereqError: if the parameters are not valid
7458
7459   """
7460   nodenames = _FilterVmNodes(lu, nodenames)
7461   result = lu.rpc.call_os_validate(required, nodenames, osname,
7462                                    [constants.OS_VALIDATE_PARAMETERS],
7463                                    osparams)
7464   for node, nres in result.items():
7465     # we don't check for offline cases since this should be run only
7466     # against the master node and/or an instance's nodes
7467     nres.Raise("OS Parameters validation failed on node %s" % node)
7468     if not nres.payload:
7469       lu.LogInfo("OS %s not found on node %s, validation skipped",
7470                  osname, node)
7471
7472
7473 class LUInstanceCreate(LogicalUnit):
7474   """Create an instance.
7475
7476   """
7477   HPATH = "instance-add"
7478   HTYPE = constants.HTYPE_INSTANCE
7479   REQ_BGL = False
7480
7481   def CheckArguments(self):
7482     """Check arguments.
7483
7484     """
7485     # do not require name_check to ease forward/backward compatibility
7486     # for tools
7487     if self.op.no_install and self.op.start:
7488       self.LogInfo("No-installation mode selected, disabling startup")
7489       self.op.start = False
7490     # validate/normalize the instance name
7491     self.op.instance_name = \
7492       netutils.Hostname.GetNormalizedName(self.op.instance_name)
7493
7494     if self.op.ip_check and not self.op.name_check:
7495       # TODO: make the ip check more flexible and not depend on the name check
7496       raise errors.OpPrereqError("Cannot do ip check without a name check",
7497                                  errors.ECODE_INVAL)
7498
7499     # check nics' parameter names
7500     for nic in self.op.nics:
7501       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7502
7503     # check disks. parameter names and consistent adopt/no-adopt strategy
7504     has_adopt = has_no_adopt = False
7505     for disk in self.op.disks:
7506       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7507       if constants.IDISK_ADOPT in disk:
7508         has_adopt = True
7509       else:
7510         has_no_adopt = True
7511     if has_adopt and has_no_adopt:
7512       raise errors.OpPrereqError("Either all disks are adopted or none is",
7513                                  errors.ECODE_INVAL)
7514     if has_adopt:
7515       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7516         raise errors.OpPrereqError("Disk adoption is not supported for the"
7517                                    " '%s' disk template" %
7518                                    self.op.disk_template,
7519                                    errors.ECODE_INVAL)
7520       if self.op.iallocator is not None:
7521         raise errors.OpPrereqError("Disk adoption not allowed with an"
7522                                    " iallocator script", errors.ECODE_INVAL)
7523       if self.op.mode == constants.INSTANCE_IMPORT:
7524         raise errors.OpPrereqError("Disk adoption not allowed for"
7525                                    " instance import", errors.ECODE_INVAL)
7526     else:
7527       if self.op.disk_template in constants.DTS_MUST_ADOPT:
7528         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7529                                    " but no 'adopt' parameter given" %
7530                                    self.op.disk_template,
7531                                    errors.ECODE_INVAL)
7532
7533     self.adopt_disks = has_adopt
7534
7535     # instance name verification
7536     if self.op.name_check:
7537       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7538       self.op.instance_name = self.hostname1.name
7539       # used in CheckPrereq for ip ping check
7540       self.check_ip = self.hostname1.ip
7541     else:
7542       self.check_ip = None
7543
7544     # file storage checks
7545     if (self.op.file_driver and
7546         not self.op.file_driver in constants.FILE_DRIVER):
7547       raise errors.OpPrereqError("Invalid file driver name '%s'" %
7548                                  self.op.file_driver, errors.ECODE_INVAL)
7549
7550     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7551       raise errors.OpPrereqError("File storage directory path not absolute",
7552                                  errors.ECODE_INVAL)
7553
7554     ### Node/iallocator related checks
7555     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7556
7557     if self.op.pnode is not None:
7558       if self.op.disk_template in constants.DTS_INT_MIRROR:
7559         if self.op.snode is None:
7560           raise errors.OpPrereqError("The networked disk templates need"
7561                                      " a mirror node", errors.ECODE_INVAL)
7562       elif self.op.snode:
7563         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7564                         " template")
7565         self.op.snode = None
7566
7567     self._cds = _GetClusterDomainSecret()
7568
7569     if self.op.mode == constants.INSTANCE_IMPORT:
7570       # On import force_variant must be True, because if we forced it at
7571       # initial install, our only chance when importing it back is that it
7572       # works again!
7573       self.op.force_variant = True
7574
7575       if self.op.no_install:
7576         self.LogInfo("No-installation mode has no effect during import")
7577
7578     elif self.op.mode == constants.INSTANCE_CREATE:
7579       if self.op.os_type is None:
7580         raise errors.OpPrereqError("No guest OS specified",
7581                                    errors.ECODE_INVAL)
7582       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7583         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7584                                    " installation" % self.op.os_type,
7585                                    errors.ECODE_STATE)
7586       if self.op.disk_template is None:
7587         raise errors.OpPrereqError("No disk template specified",
7588                                    errors.ECODE_INVAL)
7589
7590     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7591       # Check handshake to ensure both clusters have the same domain secret
7592       src_handshake = self.op.source_handshake
7593       if not src_handshake:
7594         raise errors.OpPrereqError("Missing source handshake",
7595                                    errors.ECODE_INVAL)
7596
7597       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7598                                                            src_handshake)
7599       if errmsg:
7600         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7601                                    errors.ECODE_INVAL)
7602
7603       # Load and check source CA
7604       self.source_x509_ca_pem = self.op.source_x509_ca
7605       if not self.source_x509_ca_pem:
7606         raise errors.OpPrereqError("Missing source X509 CA",
7607                                    errors.ECODE_INVAL)
7608
7609       try:
7610         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7611                                                     self._cds)
7612       except OpenSSL.crypto.Error, err:
7613         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7614                                    (err, ), errors.ECODE_INVAL)
7615
7616       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7617       if errcode is not None:
7618         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7619                                    errors.ECODE_INVAL)
7620
7621       self.source_x509_ca = cert
7622
7623       src_instance_name = self.op.source_instance_name
7624       if not src_instance_name:
7625         raise errors.OpPrereqError("Missing source instance name",
7626                                    errors.ECODE_INVAL)
7627
7628       self.source_instance_name = \
7629           netutils.GetHostname(name=src_instance_name).name
7630
7631     else:
7632       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7633                                  self.op.mode, errors.ECODE_INVAL)
7634
7635   def ExpandNames(self):
7636     """ExpandNames for CreateInstance.
7637
7638     Figure out the right locks for instance creation.
7639
7640     """
7641     self.needed_locks = {}
7642
7643     instance_name = self.op.instance_name
7644     # this is just a preventive check, but someone might still add this
7645     # instance in the meantime, and creation will fail at lock-add time
7646     if instance_name in self.cfg.GetInstanceList():
7647       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7648                                  instance_name, errors.ECODE_EXISTS)
7649
7650     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7651
7652     if self.op.iallocator:
7653       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7654     else:
7655       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7656       nodelist = [self.op.pnode]
7657       if self.op.snode is not None:
7658         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7659         nodelist.append(self.op.snode)
7660       self.needed_locks[locking.LEVEL_NODE] = nodelist
7661
7662     # in case of import lock the source node too
7663     if self.op.mode == constants.INSTANCE_IMPORT:
7664       src_node = self.op.src_node
7665       src_path = self.op.src_path
7666
7667       if src_path is None:
7668         self.op.src_path = src_path = self.op.instance_name
7669
7670       if src_node is None:
7671         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7672         self.op.src_node = None
7673         if os.path.isabs(src_path):
7674           raise errors.OpPrereqError("Importing an instance from an absolute"
7675                                      " path requires a source node option.",
7676                                      errors.ECODE_INVAL)
7677       else:
7678         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7679         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7680           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7681         if not os.path.isabs(src_path):
7682           self.op.src_path = src_path = \
7683             utils.PathJoin(constants.EXPORT_DIR, src_path)
7684
7685   def _RunAllocator(self):
7686     """Run the allocator based on input opcode.
7687
7688     """
7689     nics = [n.ToDict() for n in self.nics]
7690     ial = IAllocator(self.cfg, self.rpc,
7691                      mode=constants.IALLOCATOR_MODE_ALLOC,
7692                      name=self.op.instance_name,
7693                      disk_template=self.op.disk_template,
7694                      tags=[],
7695                      os=self.op.os_type,
7696                      vcpus=self.be_full[constants.BE_VCPUS],
7697                      mem_size=self.be_full[constants.BE_MEMORY],
7698                      disks=self.disks,
7699                      nics=nics,
7700                      hypervisor=self.op.hypervisor,
7701                      )
7702
7703     ial.Run(self.op.iallocator)
7704
7705     if not ial.success:
7706       raise errors.OpPrereqError("Can't compute nodes using"
7707                                  " iallocator '%s': %s" %
7708                                  (self.op.iallocator, ial.info),
7709                                  errors.ECODE_NORES)
7710     if len(ial.result) != ial.required_nodes:
7711       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7712                                  " of nodes (%s), required %s" %
7713                                  (self.op.iallocator, len(ial.result),
7714                                   ial.required_nodes), errors.ECODE_FAULT)
7715     self.op.pnode = ial.result[0]
7716     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7717                  self.op.instance_name, self.op.iallocator,
7718                  utils.CommaJoin(ial.result))
7719     if ial.required_nodes == 2:
7720       self.op.snode = ial.result[1]
7721
7722   def BuildHooksEnv(self):
7723     """Build hooks env.
7724
7725     This runs on master, primary and secondary nodes of the instance.
7726
7727     """
7728     env = {
7729       "ADD_MODE": self.op.mode,
7730       }
7731     if self.op.mode == constants.INSTANCE_IMPORT:
7732       env["SRC_NODE"] = self.op.src_node
7733       env["SRC_PATH"] = self.op.src_path
7734       env["SRC_IMAGES"] = self.src_images
7735
7736     env.update(_BuildInstanceHookEnv(
7737       name=self.op.instance_name,
7738       primary_node=self.op.pnode,
7739       secondary_nodes=self.secondaries,
7740       status=self.op.start,
7741       os_type=self.op.os_type,
7742       memory=self.be_full[constants.BE_MEMORY],
7743       vcpus=self.be_full[constants.BE_VCPUS],
7744       nics=_NICListToTuple(self, self.nics),
7745       disk_template=self.op.disk_template,
7746       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7747              for d in self.disks],
7748       bep=self.be_full,
7749       hvp=self.hv_full,
7750       hypervisor_name=self.op.hypervisor,
7751     ))
7752
7753     return env
7754
7755   def BuildHooksNodes(self):
7756     """Build hooks nodes.
7757
7758     """
7759     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7760     return nl, nl
7761
7762   def _ReadExportInfo(self):
7763     """Reads the export information from disk.
7764
7765     It will override the opcode source node and path with the actual
7766     information, if these two were not specified before.
7767
7768     @return: the export information
7769
7770     """
7771     assert self.op.mode == constants.INSTANCE_IMPORT
7772
7773     src_node = self.op.src_node
7774     src_path = self.op.src_path
7775
7776     if src_node is None:
7777       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7778       exp_list = self.rpc.call_export_list(locked_nodes)
7779       found = False
7780       for node in exp_list:
7781         if exp_list[node].fail_msg:
7782           continue
7783         if src_path in exp_list[node].payload:
7784           found = True
7785           self.op.src_node = src_node = node
7786           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7787                                                        src_path)
7788           break
7789       if not found:
7790         raise errors.OpPrereqError("No export found for relative path %s" %
7791                                     src_path, errors.ECODE_INVAL)
7792
7793     _CheckNodeOnline(self, src_node)
7794     result = self.rpc.call_export_info(src_node, src_path)
7795     result.Raise("No export or invalid export found in dir %s" % src_path)
7796
7797     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7798     if not export_info.has_section(constants.INISECT_EXP):
7799       raise errors.ProgrammerError("Corrupted export config",
7800                                    errors.ECODE_ENVIRON)
7801
7802     ei_version = export_info.get(constants.INISECT_EXP, "version")
7803     if (int(ei_version) != constants.EXPORT_VERSION):
7804       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7805                                  (ei_version, constants.EXPORT_VERSION),
7806                                  errors.ECODE_ENVIRON)
7807     return export_info
7808
7809   def _ReadExportParams(self, einfo):
7810     """Use export parameters as defaults.
7811
7812     In case the opcode doesn't specify (as in override) some instance
7813     parameters, then try to use them from the export information, if
7814     that declares them.
7815
7816     """
7817     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7818
7819     if self.op.disk_template is None:
7820       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7821         self.op.disk_template = einfo.get(constants.INISECT_INS,
7822                                           "disk_template")
7823       else:
7824         raise errors.OpPrereqError("No disk template specified and the export"
7825                                    " is missing the disk_template information",
7826                                    errors.ECODE_INVAL)
7827
7828     if not self.op.disks:
7829       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7830         disks = []
7831         # TODO: import the disk iv_name too
7832         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7833           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7834           disks.append({constants.IDISK_SIZE: disk_sz})
7835         self.op.disks = disks
7836       else:
7837         raise errors.OpPrereqError("No disk info specified and the export"
7838                                    " is missing the disk information",
7839                                    errors.ECODE_INVAL)
7840
7841     if (not self.op.nics and
7842         einfo.has_option(constants.INISECT_INS, "nic_count")):
7843       nics = []
7844       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7845         ndict = {}
7846         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7847           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7848           ndict[name] = v
7849         nics.append(ndict)
7850       self.op.nics = nics
7851
7852     if (self.op.hypervisor is None and
7853         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7854       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7855     if einfo.has_section(constants.INISECT_HYP):
7856       # use the export parameters but do not override the ones
7857       # specified by the user
7858       for name, value in einfo.items(constants.INISECT_HYP):
7859         if name not in self.op.hvparams:
7860           self.op.hvparams[name] = value
7861
7862     if einfo.has_section(constants.INISECT_BEP):
7863       # use the parameters, without overriding
7864       for name, value in einfo.items(constants.INISECT_BEP):
7865         if name not in self.op.beparams:
7866           self.op.beparams[name] = value
7867     else:
7868       # try to read the parameters old style, from the main section
7869       for name in constants.BES_PARAMETERS:
7870         if (name not in self.op.beparams and
7871             einfo.has_option(constants.INISECT_INS, name)):
7872           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7873
7874     if einfo.has_section(constants.INISECT_OSP):
7875       # use the parameters, without overriding
7876       for name, value in einfo.items(constants.INISECT_OSP):
7877         if name not in self.op.osparams:
7878           self.op.osparams[name] = value
7879
7880   def _RevertToDefaults(self, cluster):
7881     """Revert the instance parameters to the default values.
7882
7883     """
7884     # hvparams
7885     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7886     for name in self.op.hvparams.keys():
7887       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7888         del self.op.hvparams[name]
7889     # beparams
7890     be_defs = cluster.SimpleFillBE({})
7891     for name in self.op.beparams.keys():
7892       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7893         del self.op.beparams[name]
7894     # nic params
7895     nic_defs = cluster.SimpleFillNIC({})
7896     for nic in self.op.nics:
7897       for name in constants.NICS_PARAMETERS:
7898         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7899           del nic[name]
7900     # osparams
7901     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7902     for name in self.op.osparams.keys():
7903       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7904         del self.op.osparams[name]
7905
7906   def CheckPrereq(self):
7907     """Check prerequisites.
7908
7909     """
7910     if self.op.mode == constants.INSTANCE_IMPORT:
7911       export_info = self._ReadExportInfo()
7912       self._ReadExportParams(export_info)
7913
7914     if (not self.cfg.GetVGName() and
7915         self.op.disk_template not in constants.DTS_NOT_LVM):
7916       raise errors.OpPrereqError("Cluster does not support lvm-based"
7917                                  " instances", errors.ECODE_STATE)
7918
7919     if self.op.hypervisor is None:
7920       self.op.hypervisor = self.cfg.GetHypervisorType()
7921
7922     cluster = self.cfg.GetClusterInfo()
7923     enabled_hvs = cluster.enabled_hypervisors
7924     if self.op.hypervisor not in enabled_hvs:
7925       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7926                                  " cluster (%s)" % (self.op.hypervisor,
7927                                   ",".join(enabled_hvs)),
7928                                  errors.ECODE_STATE)
7929
7930     # check hypervisor parameter syntax (locally)
7931     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7932     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7933                                       self.op.hvparams)
7934     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7935     hv_type.CheckParameterSyntax(filled_hvp)
7936     self.hv_full = filled_hvp
7937     # check that we don't specify global parameters on an instance
7938     _CheckGlobalHvParams(self.op.hvparams)
7939
7940     # fill and remember the beparams dict
7941     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7942     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7943
7944     # build os parameters
7945     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7946
7947     # now that hvp/bep are in final format, let's reset to defaults,
7948     # if told to do so
7949     if self.op.identify_defaults:
7950       self._RevertToDefaults(cluster)
7951
7952     # NIC buildup
7953     self.nics = []
7954     for idx, nic in enumerate(self.op.nics):
7955       nic_mode_req = nic.get(constants.INIC_MODE, None)
7956       nic_mode = nic_mode_req
7957       if nic_mode is None:
7958         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7959
7960       # in routed mode, for the first nic, the default ip is 'auto'
7961       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7962         default_ip_mode = constants.VALUE_AUTO
7963       else:
7964         default_ip_mode = constants.VALUE_NONE
7965
7966       # ip validity checks
7967       ip = nic.get(constants.INIC_IP, default_ip_mode)
7968       if ip is None or ip.lower() == constants.VALUE_NONE:
7969         nic_ip = None
7970       elif ip.lower() == constants.VALUE_AUTO:
7971         if not self.op.name_check:
7972           raise errors.OpPrereqError("IP address set to auto but name checks"
7973                                      " have been skipped",
7974                                      errors.ECODE_INVAL)
7975         nic_ip = self.hostname1.ip
7976       else:
7977         if not netutils.IPAddress.IsValid(ip):
7978           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7979                                      errors.ECODE_INVAL)
7980         nic_ip = ip
7981
7982       # TODO: check the ip address for uniqueness
7983       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7984         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7985                                    errors.ECODE_INVAL)
7986
7987       # MAC address verification
7988       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
7989       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7990         mac = utils.NormalizeAndValidateMac(mac)
7991
7992         try:
7993           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7994         except errors.ReservationError:
7995           raise errors.OpPrereqError("MAC address %s already in use"
7996                                      " in cluster" % mac,
7997                                      errors.ECODE_NOTUNIQUE)
7998
7999       #  Build nic parameters
8000       link = nic.get(constants.INIC_LINK, None)
8001       nicparams = {}
8002       if nic_mode_req:
8003         nicparams[constants.NIC_MODE] = nic_mode_req
8004       if link:
8005         nicparams[constants.NIC_LINK] = link
8006
8007       check_params = cluster.SimpleFillNIC(nicparams)
8008       objects.NIC.CheckParameterSyntax(check_params)
8009       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8010
8011     # disk checks/pre-build
8012     default_vg = self.cfg.GetVGName()
8013     self.disks = []
8014     for disk in self.op.disks:
8015       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8016       if mode not in constants.DISK_ACCESS_SET:
8017         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8018                                    mode, errors.ECODE_INVAL)
8019       size = disk.get(constants.IDISK_SIZE, None)
8020       if size is None:
8021         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8022       try:
8023         size = int(size)
8024       except (TypeError, ValueError):
8025         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8026                                    errors.ECODE_INVAL)
8027
8028       data_vg = disk.get(constants.IDISK_VG, default_vg)
8029       new_disk = {
8030         constants.IDISK_SIZE: size,
8031         constants.IDISK_MODE: mode,
8032         constants.IDISK_VG: data_vg,
8033         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8034         }
8035       if constants.IDISK_ADOPT in disk:
8036         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8037       self.disks.append(new_disk)
8038
8039     if self.op.mode == constants.INSTANCE_IMPORT:
8040
8041       # Check that the new instance doesn't have less disks than the export
8042       instance_disks = len(self.disks)
8043       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8044       if instance_disks < export_disks:
8045         raise errors.OpPrereqError("Not enough disks to import."
8046                                    " (instance: %d, export: %d)" %
8047                                    (instance_disks, export_disks),
8048                                    errors.ECODE_INVAL)
8049
8050       disk_images = []
8051       for idx in range(export_disks):
8052         option = 'disk%d_dump' % idx
8053         if export_info.has_option(constants.INISECT_INS, option):
8054           # FIXME: are the old os-es, disk sizes, etc. useful?
8055           export_name = export_info.get(constants.INISECT_INS, option)
8056           image = utils.PathJoin(self.op.src_path, export_name)
8057           disk_images.append(image)
8058         else:
8059           disk_images.append(False)
8060
8061       self.src_images = disk_images
8062
8063       old_name = export_info.get(constants.INISECT_INS, 'name')
8064       try:
8065         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8066       except (TypeError, ValueError), err:
8067         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8068                                    " an integer: %s" % str(err),
8069                                    errors.ECODE_STATE)
8070       if self.op.instance_name == old_name:
8071         for idx, nic in enumerate(self.nics):
8072           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8073             nic_mac_ini = 'nic%d_mac' % idx
8074             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8075
8076     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8077
8078     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8079     if self.op.ip_check:
8080       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8081         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8082                                    (self.check_ip, self.op.instance_name),
8083                                    errors.ECODE_NOTUNIQUE)
8084
8085     #### mac address generation
8086     # By generating here the mac address both the allocator and the hooks get
8087     # the real final mac address rather than the 'auto' or 'generate' value.
8088     # There is a race condition between the generation and the instance object
8089     # creation, which means that we know the mac is valid now, but we're not
8090     # sure it will be when we actually add the instance. If things go bad
8091     # adding the instance will abort because of a duplicate mac, and the
8092     # creation job will fail.
8093     for nic in self.nics:
8094       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8095         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8096
8097     #### allocator run
8098
8099     if self.op.iallocator is not None:
8100       self._RunAllocator()
8101
8102     #### node related checks
8103
8104     # check primary node
8105     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8106     assert self.pnode is not None, \
8107       "Cannot retrieve locked node %s" % self.op.pnode
8108     if pnode.offline:
8109       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8110                                  pnode.name, errors.ECODE_STATE)
8111     if pnode.drained:
8112       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8113                                  pnode.name, errors.ECODE_STATE)
8114     if not pnode.vm_capable:
8115       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8116                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8117
8118     self.secondaries = []
8119
8120     # mirror node verification
8121     if self.op.disk_template in constants.DTS_INT_MIRROR:
8122       if self.op.snode == pnode.name:
8123         raise errors.OpPrereqError("The secondary node cannot be the"
8124                                    " primary node.", errors.ECODE_INVAL)
8125       _CheckNodeOnline(self, self.op.snode)
8126       _CheckNodeNotDrained(self, self.op.snode)
8127       _CheckNodeVmCapable(self, self.op.snode)
8128       self.secondaries.append(self.op.snode)
8129
8130     nodenames = [pnode.name] + self.secondaries
8131
8132     if not self.adopt_disks:
8133       # Check lv size requirements, if not adopting
8134       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8135       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8136
8137     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8138       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8139                                 disk[constants.IDISK_ADOPT])
8140                      for disk in self.disks])
8141       if len(all_lvs) != len(self.disks):
8142         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8143                                    errors.ECODE_INVAL)
8144       for lv_name in all_lvs:
8145         try:
8146           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8147           # to ReserveLV uses the same syntax
8148           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8149         except errors.ReservationError:
8150           raise errors.OpPrereqError("LV named %s used by another instance" %
8151                                      lv_name, errors.ECODE_NOTUNIQUE)
8152
8153       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8154       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8155
8156       node_lvs = self.rpc.call_lv_list([pnode.name],
8157                                        vg_names.payload.keys())[pnode.name]
8158       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8159       node_lvs = node_lvs.payload
8160
8161       delta = all_lvs.difference(node_lvs.keys())
8162       if delta:
8163         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8164                                    utils.CommaJoin(delta),
8165                                    errors.ECODE_INVAL)
8166       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8167       if online_lvs:
8168         raise errors.OpPrereqError("Online logical volumes found, cannot"
8169                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8170                                    errors.ECODE_STATE)
8171       # update the size of disk based on what is found
8172       for dsk in self.disks:
8173         dsk[constants.IDISK_SIZE] = \
8174           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8175                                         dsk[constants.IDISK_ADOPT])][0]))
8176
8177     elif self.op.disk_template == constants.DT_BLOCK:
8178       # Normalize and de-duplicate device paths
8179       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8180                        for disk in self.disks])
8181       if len(all_disks) != len(self.disks):
8182         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8183                                    errors.ECODE_INVAL)
8184       baddisks = [d for d in all_disks
8185                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8186       if baddisks:
8187         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8188                                    " cannot be adopted" %
8189                                    (", ".join(baddisks),
8190                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8191                                    errors.ECODE_INVAL)
8192
8193       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8194                                             list(all_disks))[pnode.name]
8195       node_disks.Raise("Cannot get block device information from node %s" %
8196                        pnode.name)
8197       node_disks = node_disks.payload
8198       delta = all_disks.difference(node_disks.keys())
8199       if delta:
8200         raise errors.OpPrereqError("Missing block device(s): %s" %
8201                                    utils.CommaJoin(delta),
8202                                    errors.ECODE_INVAL)
8203       for dsk in self.disks:
8204         dsk[constants.IDISK_SIZE] = \
8205           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8206
8207     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8208
8209     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8210     # check OS parameters (remotely)
8211     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8212
8213     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8214
8215     # memory check on primary node
8216     if self.op.start:
8217       _CheckNodeFreeMemory(self, self.pnode.name,
8218                            "creating instance %s" % self.op.instance_name,
8219                            self.be_full[constants.BE_MEMORY],
8220                            self.op.hypervisor)
8221
8222     self.dry_run_result = list(nodenames)
8223
8224   def Exec(self, feedback_fn):
8225     """Create and add the instance to the cluster.
8226
8227     """
8228     instance = self.op.instance_name
8229     pnode_name = self.pnode.name
8230
8231     ht_kind = self.op.hypervisor
8232     if ht_kind in constants.HTS_REQ_PORT:
8233       network_port = self.cfg.AllocatePort()
8234     else:
8235       network_port = None
8236
8237     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8238       # this is needed because os.path.join does not accept None arguments
8239       if self.op.file_storage_dir is None:
8240         string_file_storage_dir = ""
8241       else:
8242         string_file_storage_dir = self.op.file_storage_dir
8243
8244       # build the full file storage dir path
8245       if self.op.disk_template == constants.DT_SHARED_FILE:
8246         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8247       else:
8248         get_fsd_fn = self.cfg.GetFileStorageDir
8249
8250       file_storage_dir = utils.PathJoin(get_fsd_fn(),
8251                                         string_file_storage_dir, instance)
8252     else:
8253       file_storage_dir = ""
8254
8255     disks = _GenerateDiskTemplate(self,
8256                                   self.op.disk_template,
8257                                   instance, pnode_name,
8258                                   self.secondaries,
8259                                   self.disks,
8260                                   file_storage_dir,
8261                                   self.op.file_driver,
8262                                   0,
8263                                   feedback_fn)
8264
8265     iobj = objects.Instance(name=instance, os=self.op.os_type,
8266                             primary_node=pnode_name,
8267                             nics=self.nics, disks=disks,
8268                             disk_template=self.op.disk_template,
8269                             admin_up=False,
8270                             network_port=network_port,
8271                             beparams=self.op.beparams,
8272                             hvparams=self.op.hvparams,
8273                             hypervisor=self.op.hypervisor,
8274                             osparams=self.op.osparams,
8275                             )
8276
8277     if self.adopt_disks:
8278       if self.op.disk_template == constants.DT_PLAIN:
8279         # rename LVs to the newly-generated names; we need to construct
8280         # 'fake' LV disks with the old data, plus the new unique_id
8281         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8282         rename_to = []
8283         for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8284           rename_to.append(t_dsk.logical_id)
8285           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8286           self.cfg.SetDiskID(t_dsk, pnode_name)
8287         result = self.rpc.call_blockdev_rename(pnode_name,
8288                                                zip(tmp_disks, rename_to))
8289         result.Raise("Failed to rename adoped LVs")
8290     else:
8291       feedback_fn("* creating instance disks...")
8292       try:
8293         _CreateDisks(self, iobj)
8294       except errors.OpExecError:
8295         self.LogWarning("Device creation failed, reverting...")
8296         try:
8297           _RemoveDisks(self, iobj)
8298         finally:
8299           self.cfg.ReleaseDRBDMinors(instance)
8300           raise
8301
8302     feedback_fn("adding instance %s to cluster config" % instance)
8303
8304     self.cfg.AddInstance(iobj, self.proc.GetECId())
8305
8306     # Declare that we don't want to remove the instance lock anymore, as we've
8307     # added the instance to the config
8308     del self.remove_locks[locking.LEVEL_INSTANCE]
8309
8310     if self.op.mode == constants.INSTANCE_IMPORT:
8311       # Release unused nodes
8312       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8313     else:
8314       # Release all nodes
8315       _ReleaseLocks(self, locking.LEVEL_NODE)
8316
8317     disk_abort = False
8318     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8319       feedback_fn("* wiping instance disks...")
8320       try:
8321         _WipeDisks(self, iobj)
8322       except errors.OpExecError, err:
8323         logging.exception("Wiping disks failed")
8324         self.LogWarning("Wiping instance disks failed (%s)", err)
8325         disk_abort = True
8326
8327     if disk_abort:
8328       # Something is already wrong with the disks, don't do anything else
8329       pass
8330     elif self.op.wait_for_sync:
8331       disk_abort = not _WaitForSync(self, iobj)
8332     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8333       # make sure the disks are not degraded (still sync-ing is ok)
8334       time.sleep(15)
8335       feedback_fn("* checking mirrors status")
8336       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8337     else:
8338       disk_abort = False
8339
8340     if disk_abort:
8341       _RemoveDisks(self, iobj)
8342       self.cfg.RemoveInstance(iobj.name)
8343       # Make sure the instance lock gets removed
8344       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8345       raise errors.OpExecError("There are some degraded disks for"
8346                                " this instance")
8347
8348     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8349       if self.op.mode == constants.INSTANCE_CREATE:
8350         if not self.op.no_install:
8351           feedback_fn("* running the instance OS create scripts...")
8352           # FIXME: pass debug option from opcode to backend
8353           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8354                                                  self.op.debug_level)
8355           result.Raise("Could not add os for instance %s"
8356                        " on node %s" % (instance, pnode_name))
8357
8358       elif self.op.mode == constants.INSTANCE_IMPORT:
8359         feedback_fn("* running the instance OS import scripts...")
8360
8361         transfers = []
8362
8363         for idx, image in enumerate(self.src_images):
8364           if not image:
8365             continue
8366
8367           # FIXME: pass debug option from opcode to backend
8368           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8369                                              constants.IEIO_FILE, (image, ),
8370                                              constants.IEIO_SCRIPT,
8371                                              (iobj.disks[idx], idx),
8372                                              None)
8373           transfers.append(dt)
8374
8375         import_result = \
8376           masterd.instance.TransferInstanceData(self, feedback_fn,
8377                                                 self.op.src_node, pnode_name,
8378                                                 self.pnode.secondary_ip,
8379                                                 iobj, transfers)
8380         if not compat.all(import_result):
8381           self.LogWarning("Some disks for instance %s on node %s were not"
8382                           " imported successfully" % (instance, pnode_name))
8383
8384       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8385         feedback_fn("* preparing remote import...")
8386         # The source cluster will stop the instance before attempting to make a
8387         # connection. In some cases stopping an instance can take a long time,
8388         # hence the shutdown timeout is added to the connection timeout.
8389         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8390                            self.op.source_shutdown_timeout)
8391         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8392
8393         assert iobj.primary_node == self.pnode.name
8394         disk_results = \
8395           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8396                                         self.source_x509_ca,
8397                                         self._cds, timeouts)
8398         if not compat.all(disk_results):
8399           # TODO: Should the instance still be started, even if some disks
8400           # failed to import (valid for local imports, too)?
8401           self.LogWarning("Some disks for instance %s on node %s were not"
8402                           " imported successfully" % (instance, pnode_name))
8403
8404         # Run rename script on newly imported instance
8405         assert iobj.name == instance
8406         feedback_fn("Running rename script for %s" % instance)
8407         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8408                                                    self.source_instance_name,
8409                                                    self.op.debug_level)
8410         if result.fail_msg:
8411           self.LogWarning("Failed to run rename script for %s on node"
8412                           " %s: %s" % (instance, pnode_name, result.fail_msg))
8413
8414       else:
8415         # also checked in the prereq part
8416         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8417                                      % self.op.mode)
8418
8419     if self.op.start:
8420       iobj.admin_up = True
8421       self.cfg.Update(iobj, feedback_fn)
8422       logging.info("Starting instance %s on node %s", instance, pnode_name)
8423       feedback_fn("* starting instance...")
8424       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8425       result.Raise("Could not start instance")
8426
8427     return list(iobj.all_nodes)
8428
8429
8430 class LUInstanceConsole(NoHooksLU):
8431   """Connect to an instance's console.
8432
8433   This is somewhat special in that it returns the command line that
8434   you need to run on the master node in order to connect to the
8435   console.
8436
8437   """
8438   REQ_BGL = False
8439
8440   def ExpandNames(self):
8441     self._ExpandAndLockInstance()
8442
8443   def CheckPrereq(self):
8444     """Check prerequisites.
8445
8446     This checks that the instance is in the cluster.
8447
8448     """
8449     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8450     assert self.instance is not None, \
8451       "Cannot retrieve locked instance %s" % self.op.instance_name
8452     _CheckNodeOnline(self, self.instance.primary_node)
8453
8454   def Exec(self, feedback_fn):
8455     """Connect to the console of an instance
8456
8457     """
8458     instance = self.instance
8459     node = instance.primary_node
8460
8461     node_insts = self.rpc.call_instance_list([node],
8462                                              [instance.hypervisor])[node]
8463     node_insts.Raise("Can't get node information from %s" % node)
8464
8465     if instance.name not in node_insts.payload:
8466       if instance.admin_up:
8467         state = constants.INSTST_ERRORDOWN
8468       else:
8469         state = constants.INSTST_ADMINDOWN
8470       raise errors.OpExecError("Instance %s is not running (state %s)" %
8471                                (instance.name, state))
8472
8473     logging.debug("Connecting to console of %s on %s", instance.name, node)
8474
8475     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8476
8477
8478 def _GetInstanceConsole(cluster, instance):
8479   """Returns console information for an instance.
8480
8481   @type cluster: L{objects.Cluster}
8482   @type instance: L{objects.Instance}
8483   @rtype: dict
8484
8485   """
8486   hyper = hypervisor.GetHypervisor(instance.hypervisor)
8487   # beparams and hvparams are passed separately, to avoid editing the
8488   # instance and then saving the defaults in the instance itself.
8489   hvparams = cluster.FillHV(instance)
8490   beparams = cluster.FillBE(instance)
8491   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8492
8493   assert console.instance == instance.name
8494   assert console.Validate()
8495
8496   return console.ToDict()
8497
8498
8499 class LUInstanceReplaceDisks(LogicalUnit):
8500   """Replace the disks of an instance.
8501
8502   """
8503   HPATH = "mirrors-replace"
8504   HTYPE = constants.HTYPE_INSTANCE
8505   REQ_BGL = False
8506
8507   def CheckArguments(self):
8508     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8509                                   self.op.iallocator)
8510
8511   def ExpandNames(self):
8512     self._ExpandAndLockInstance()
8513
8514     if self.op.iallocator is not None:
8515       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8516
8517     elif self.op.remote_node is not None:
8518       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8519       self.op.remote_node = remote_node
8520
8521       # Warning: do not remove the locking of the new secondary here
8522       # unless DRBD8.AddChildren is changed to work in parallel;
8523       # currently it doesn't since parallel invocations of
8524       # FindUnusedMinor will conflict
8525       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8526       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8527
8528     else:
8529       self.needed_locks[locking.LEVEL_NODE] = []
8530       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8531
8532     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8533                                    self.op.iallocator, self.op.remote_node,
8534                                    self.op.disks, False, self.op.early_release)
8535
8536     self.tasklets = [self.replacer]
8537
8538   def DeclareLocks(self, level):
8539     # If we're not already locking all nodes in the set we have to declare the
8540     # instance's primary/secondary nodes.
8541     if (level == locking.LEVEL_NODE and
8542         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8543       self._LockInstancesNodes()
8544
8545   def BuildHooksEnv(self):
8546     """Build hooks env.
8547
8548     This runs on the master, the primary and all the secondaries.
8549
8550     """
8551     instance = self.replacer.instance
8552     env = {
8553       "MODE": self.op.mode,
8554       "NEW_SECONDARY": self.op.remote_node,
8555       "OLD_SECONDARY": instance.secondary_nodes[0],
8556       }
8557     env.update(_BuildInstanceHookEnvByObject(self, instance))
8558     return env
8559
8560   def BuildHooksNodes(self):
8561     """Build hooks nodes.
8562
8563     """
8564     instance = self.replacer.instance
8565     nl = [
8566       self.cfg.GetMasterNode(),
8567       instance.primary_node,
8568       ]
8569     if self.op.remote_node is not None:
8570       nl.append(self.op.remote_node)
8571     return nl, nl
8572
8573
8574 class TLReplaceDisks(Tasklet):
8575   """Replaces disks for an instance.
8576
8577   Note: Locking is not within the scope of this class.
8578
8579   """
8580   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8581                disks, delay_iallocator, early_release):
8582     """Initializes this class.
8583
8584     """
8585     Tasklet.__init__(self, lu)
8586
8587     # Parameters
8588     self.instance_name = instance_name
8589     self.mode = mode
8590     self.iallocator_name = iallocator_name
8591     self.remote_node = remote_node
8592     self.disks = disks
8593     self.delay_iallocator = delay_iallocator
8594     self.early_release = early_release
8595
8596     # Runtime data
8597     self.instance = None
8598     self.new_node = None
8599     self.target_node = None
8600     self.other_node = None
8601     self.remote_node_info = None
8602     self.node_secondary_ip = None
8603
8604   @staticmethod
8605   def CheckArguments(mode, remote_node, iallocator):
8606     """Helper function for users of this class.
8607
8608     """
8609     # check for valid parameter combination
8610     if mode == constants.REPLACE_DISK_CHG:
8611       if remote_node is None and iallocator is None:
8612         raise errors.OpPrereqError("When changing the secondary either an"
8613                                    " iallocator script must be used or the"
8614                                    " new node given", errors.ECODE_INVAL)
8615
8616       if remote_node is not None and iallocator is not None:
8617         raise errors.OpPrereqError("Give either the iallocator or the new"
8618                                    " secondary, not both", errors.ECODE_INVAL)
8619
8620     elif remote_node is not None or iallocator is not None:
8621       # Not replacing the secondary
8622       raise errors.OpPrereqError("The iallocator and new node options can"
8623                                  " only be used when changing the"
8624                                  " secondary node", errors.ECODE_INVAL)
8625
8626   @staticmethod
8627   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8628     """Compute a new secondary node using an IAllocator.
8629
8630     """
8631     ial = IAllocator(lu.cfg, lu.rpc,
8632                      mode=constants.IALLOCATOR_MODE_RELOC,
8633                      name=instance_name,
8634                      relocate_from=relocate_from)
8635
8636     ial.Run(iallocator_name)
8637
8638     if not ial.success:
8639       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8640                                  " %s" % (iallocator_name, ial.info),
8641                                  errors.ECODE_NORES)
8642
8643     if len(ial.result) != ial.required_nodes:
8644       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8645                                  " of nodes (%s), required %s" %
8646                                  (iallocator_name,
8647                                   len(ial.result), ial.required_nodes),
8648                                  errors.ECODE_FAULT)
8649
8650     remote_node_name = ial.result[0]
8651
8652     lu.LogInfo("Selected new secondary for instance '%s': %s",
8653                instance_name, remote_node_name)
8654
8655     return remote_node_name
8656
8657   def _FindFaultyDisks(self, node_name):
8658     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8659                                     node_name, True)
8660
8661   def _CheckDisksActivated(self, instance):
8662     """Checks if the instance disks are activated.
8663
8664     @param instance: The instance to check disks
8665     @return: True if they are activated, False otherwise
8666
8667     """
8668     nodes = instance.all_nodes
8669
8670     for idx, dev in enumerate(instance.disks):
8671       for node in nodes:
8672         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8673         self.cfg.SetDiskID(dev, node)
8674
8675         result = self.rpc.call_blockdev_find(node, dev)
8676
8677         if result.offline:
8678           continue
8679         elif result.fail_msg or not result.payload:
8680           return False
8681
8682     return True
8683
8684   def CheckPrereq(self):
8685     """Check prerequisites.
8686
8687     This checks that the instance is in the cluster.
8688
8689     """
8690     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8691     assert instance is not None, \
8692       "Cannot retrieve locked instance %s" % self.instance_name
8693
8694     if instance.disk_template != constants.DT_DRBD8:
8695       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8696                                  " instances", errors.ECODE_INVAL)
8697
8698     if len(instance.secondary_nodes) != 1:
8699       raise errors.OpPrereqError("The instance has a strange layout,"
8700                                  " expected one secondary but found %d" %
8701                                  len(instance.secondary_nodes),
8702                                  errors.ECODE_FAULT)
8703
8704     if not self.delay_iallocator:
8705       self._CheckPrereq2()
8706
8707   def _CheckPrereq2(self):
8708     """Check prerequisites, second part.
8709
8710     This function should always be part of CheckPrereq. It was separated and is
8711     now called from Exec because during node evacuation iallocator was only
8712     called with an unmodified cluster model, not taking planned changes into
8713     account.
8714
8715     """
8716     instance = self.instance
8717     secondary_node = instance.secondary_nodes[0]
8718
8719     if self.iallocator_name is None:
8720       remote_node = self.remote_node
8721     else:
8722       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8723                                        instance.name, instance.secondary_nodes)
8724
8725     if remote_node is not None:
8726       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8727       assert self.remote_node_info is not None, \
8728         "Cannot retrieve locked node %s" % remote_node
8729     else:
8730       self.remote_node_info = None
8731
8732     if remote_node == self.instance.primary_node:
8733       raise errors.OpPrereqError("The specified node is the primary node of"
8734                                  " the instance.", errors.ECODE_INVAL)
8735
8736     if remote_node == secondary_node:
8737       raise errors.OpPrereqError("The specified node is already the"
8738                                  " secondary node of the instance.",
8739                                  errors.ECODE_INVAL)
8740
8741     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8742                                     constants.REPLACE_DISK_CHG):
8743       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8744                                  errors.ECODE_INVAL)
8745
8746     if self.mode == constants.REPLACE_DISK_AUTO:
8747       if not self._CheckDisksActivated(instance):
8748         raise errors.OpPrereqError("Please run activate-disks on instance %s"
8749                                    " first" % self.instance_name,
8750                                    errors.ECODE_STATE)
8751       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8752       faulty_secondary = self._FindFaultyDisks(secondary_node)
8753
8754       if faulty_primary and faulty_secondary:
8755         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8756                                    " one node and can not be repaired"
8757                                    " automatically" % self.instance_name,
8758                                    errors.ECODE_STATE)
8759
8760       if faulty_primary:
8761         self.disks = faulty_primary
8762         self.target_node = instance.primary_node
8763         self.other_node = secondary_node
8764         check_nodes = [self.target_node, self.other_node]
8765       elif faulty_secondary:
8766         self.disks = faulty_secondary
8767         self.target_node = secondary_node
8768         self.other_node = instance.primary_node
8769         check_nodes = [self.target_node, self.other_node]
8770       else:
8771         self.disks = []
8772         check_nodes = []
8773
8774     else:
8775       # Non-automatic modes
8776       if self.mode == constants.REPLACE_DISK_PRI:
8777         self.target_node = instance.primary_node
8778         self.other_node = secondary_node
8779         check_nodes = [self.target_node, self.other_node]
8780
8781       elif self.mode == constants.REPLACE_DISK_SEC:
8782         self.target_node = secondary_node
8783         self.other_node = instance.primary_node
8784         check_nodes = [self.target_node, self.other_node]
8785
8786       elif self.mode == constants.REPLACE_DISK_CHG:
8787         self.new_node = remote_node
8788         self.other_node = instance.primary_node
8789         self.target_node = secondary_node
8790         check_nodes = [self.new_node, self.other_node]
8791
8792         _CheckNodeNotDrained(self.lu, remote_node)
8793         _CheckNodeVmCapable(self.lu, remote_node)
8794
8795         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8796         assert old_node_info is not None
8797         if old_node_info.offline and not self.early_release:
8798           # doesn't make sense to delay the release
8799           self.early_release = True
8800           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8801                           " early-release mode", secondary_node)
8802
8803       else:
8804         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8805                                      self.mode)
8806
8807       # If not specified all disks should be replaced
8808       if not self.disks:
8809         self.disks = range(len(self.instance.disks))
8810
8811     for node in check_nodes:
8812       _CheckNodeOnline(self.lu, node)
8813
8814     touched_nodes = frozenset([self.new_node, self.other_node,
8815                                self.target_node])
8816
8817     if self.lu.needed_locks[locking.LEVEL_NODE] == locking.ALL_SET:
8818       # Release unneeded node locks
8819       _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
8820
8821     # Check whether disks are valid
8822     for disk_idx in self.disks:
8823       instance.FindDisk(disk_idx)
8824
8825     # Get secondary node IP addresses
8826     self.node_secondary_ip = \
8827       dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
8828            for node_name in touched_nodes
8829            if node_name is not None)
8830
8831   def Exec(self, feedback_fn):
8832     """Execute disk replacement.
8833
8834     This dispatches the disk replacement to the appropriate handler.
8835
8836     """
8837     if self.delay_iallocator:
8838       self._CheckPrereq2()
8839
8840     if (self.lu.needed_locks[locking.LEVEL_NODE] == locking.ALL_SET and
8841         __debug__):
8842       # Verify owned locks before starting operation
8843       owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE)
8844       assert set(owned_locks) == set(self.node_secondary_ip), \
8845           "Not owning the correct locks: %s" % (owned_locks, )
8846
8847     if not self.disks:
8848       feedback_fn("No disks need replacement")
8849       return
8850
8851     feedback_fn("Replacing disk(s) %s for %s" %
8852                 (utils.CommaJoin(self.disks), self.instance.name))
8853
8854     activate_disks = (not self.instance.admin_up)
8855
8856     # Activate the instance disks if we're replacing them on a down instance
8857     if activate_disks:
8858       _StartInstanceDisks(self.lu, self.instance, True)
8859
8860     try:
8861       # Should we replace the secondary node?
8862       if self.new_node is not None:
8863         fn = self._ExecDrbd8Secondary
8864       else:
8865         fn = self._ExecDrbd8DiskOnly
8866
8867       result = fn(feedback_fn)
8868     finally:
8869       # Deactivate the instance disks if we're replacing them on a
8870       # down instance
8871       if activate_disks:
8872         _SafeShutdownInstanceDisks(self.lu, self.instance)
8873
8874     if __debug__:
8875       # Verify owned locks
8876       owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE)
8877       nodes = frozenset(self.node_secondary_ip)
8878       assert ((self.early_release and not owned_locks) or
8879               (not self.early_release and not (set(owned_locks) - nodes))), \
8880         ("Not owning the correct locks, early_release=%s, owned=%r,"
8881          " nodes=%r" % (self.early_release, owned_locks, nodes))
8882
8883     return result
8884
8885   def _CheckVolumeGroup(self, nodes):
8886     self.lu.LogInfo("Checking volume groups")
8887
8888     vgname = self.cfg.GetVGName()
8889
8890     # Make sure volume group exists on all involved nodes
8891     results = self.rpc.call_vg_list(nodes)
8892     if not results:
8893       raise errors.OpExecError("Can't list volume groups on the nodes")
8894
8895     for node in nodes:
8896       res = results[node]
8897       res.Raise("Error checking node %s" % node)
8898       if vgname not in res.payload:
8899         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8900                                  (vgname, node))
8901
8902   def _CheckDisksExistence(self, nodes):
8903     # Check disk existence
8904     for idx, dev in enumerate(self.instance.disks):
8905       if idx not in self.disks:
8906         continue
8907
8908       for node in nodes:
8909         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8910         self.cfg.SetDiskID(dev, node)
8911
8912         result = self.rpc.call_blockdev_find(node, dev)
8913
8914         msg = result.fail_msg
8915         if msg or not result.payload:
8916           if not msg:
8917             msg = "disk not found"
8918           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8919                                    (idx, node, msg))
8920
8921   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8922     for idx, dev in enumerate(self.instance.disks):
8923       if idx not in self.disks:
8924         continue
8925
8926       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8927                       (idx, node_name))
8928
8929       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8930                                    ldisk=ldisk):
8931         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8932                                  " replace disks for instance %s" %
8933                                  (node_name, self.instance.name))
8934
8935   def _CreateNewStorage(self, node_name):
8936     iv_names = {}
8937
8938     for idx, dev in enumerate(self.instance.disks):
8939       if idx not in self.disks:
8940         continue
8941
8942       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8943
8944       self.cfg.SetDiskID(dev, node_name)
8945
8946       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8947       names = _GenerateUniqueNames(self.lu, lv_names)
8948
8949       vg_data = dev.children[0].logical_id[0]
8950       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8951                              logical_id=(vg_data, names[0]))
8952       vg_meta = dev.children[1].logical_id[0]
8953       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8954                              logical_id=(vg_meta, names[1]))
8955
8956       new_lvs = [lv_data, lv_meta]
8957       old_lvs = dev.children
8958       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8959
8960       # we pass force_create=True to force the LVM creation
8961       for new_lv in new_lvs:
8962         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8963                         _GetInstanceInfoText(self.instance), False)
8964
8965     return iv_names
8966
8967   def _CheckDevices(self, node_name, iv_names):
8968     for name, (dev, _, _) in iv_names.iteritems():
8969       self.cfg.SetDiskID(dev, node_name)
8970
8971       result = self.rpc.call_blockdev_find(node_name, dev)
8972
8973       msg = result.fail_msg
8974       if msg or not result.payload:
8975         if not msg:
8976           msg = "disk not found"
8977         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8978                                  (name, msg))
8979
8980       if result.payload.is_degraded:
8981         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8982
8983   def _RemoveOldStorage(self, node_name, iv_names):
8984     for name, (_, old_lvs, _) in iv_names.iteritems():
8985       self.lu.LogInfo("Remove logical volumes for %s" % name)
8986
8987       for lv in old_lvs:
8988         self.cfg.SetDiskID(lv, node_name)
8989
8990         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8991         if msg:
8992           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8993                              hint="remove unused LVs manually")
8994
8995   def _ExecDrbd8DiskOnly(self, feedback_fn):
8996     """Replace a disk on the primary or secondary for DRBD 8.
8997
8998     The algorithm for replace is quite complicated:
8999
9000       1. for each disk to be replaced:
9001
9002         1. create new LVs on the target node with unique names
9003         1. detach old LVs from the drbd device
9004         1. rename old LVs to name_replaced.<time_t>
9005         1. rename new LVs to old LVs
9006         1. attach the new LVs (with the old names now) to the drbd device
9007
9008       1. wait for sync across all devices
9009
9010       1. for each modified disk:
9011
9012         1. remove old LVs (which have the name name_replaces.<time_t>)
9013
9014     Failures are not very well handled.
9015
9016     """
9017     steps_total = 6
9018
9019     # Step: check device activation
9020     self.lu.LogStep(1, steps_total, "Check device existence")
9021     self._CheckDisksExistence([self.other_node, self.target_node])
9022     self._CheckVolumeGroup([self.target_node, self.other_node])
9023
9024     # Step: check other node consistency
9025     self.lu.LogStep(2, steps_total, "Check peer consistency")
9026     self._CheckDisksConsistency(self.other_node,
9027                                 self.other_node == self.instance.primary_node,
9028                                 False)
9029
9030     # Step: create new storage
9031     self.lu.LogStep(3, steps_total, "Allocate new storage")
9032     iv_names = self._CreateNewStorage(self.target_node)
9033
9034     # Step: for each lv, detach+rename*2+attach
9035     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9036     for dev, old_lvs, new_lvs in iv_names.itervalues():
9037       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9038
9039       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9040                                                      old_lvs)
9041       result.Raise("Can't detach drbd from local storage on node"
9042                    " %s for device %s" % (self.target_node, dev.iv_name))
9043       #dev.children = []
9044       #cfg.Update(instance)
9045
9046       # ok, we created the new LVs, so now we know we have the needed
9047       # storage; as such, we proceed on the target node to rename
9048       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9049       # using the assumption that logical_id == physical_id (which in
9050       # turn is the unique_id on that node)
9051
9052       # FIXME(iustin): use a better name for the replaced LVs
9053       temp_suffix = int(time.time())
9054       ren_fn = lambda d, suff: (d.physical_id[0],
9055                                 d.physical_id[1] + "_replaced-%s" % suff)
9056
9057       # Build the rename list based on what LVs exist on the node
9058       rename_old_to_new = []
9059       for to_ren in old_lvs:
9060         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9061         if not result.fail_msg and result.payload:
9062           # device exists
9063           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9064
9065       self.lu.LogInfo("Renaming the old LVs on the target node")
9066       result = self.rpc.call_blockdev_rename(self.target_node,
9067                                              rename_old_to_new)
9068       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9069
9070       # Now we rename the new LVs to the old LVs
9071       self.lu.LogInfo("Renaming the new LVs on the target node")
9072       rename_new_to_old = [(new, old.physical_id)
9073                            for old, new in zip(old_lvs, new_lvs)]
9074       result = self.rpc.call_blockdev_rename(self.target_node,
9075                                              rename_new_to_old)
9076       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9077
9078       for old, new in zip(old_lvs, new_lvs):
9079         new.logical_id = old.logical_id
9080         self.cfg.SetDiskID(new, self.target_node)
9081
9082       for disk in old_lvs:
9083         disk.logical_id = ren_fn(disk, temp_suffix)
9084         self.cfg.SetDiskID(disk, self.target_node)
9085
9086       # Now that the new lvs have the old name, we can add them to the device
9087       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9088       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9089                                                   new_lvs)
9090       msg = result.fail_msg
9091       if msg:
9092         for new_lv in new_lvs:
9093           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9094                                                new_lv).fail_msg
9095           if msg2:
9096             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9097                                hint=("cleanup manually the unused logical"
9098                                      "volumes"))
9099         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9100
9101       dev.children = new_lvs
9102
9103       self.cfg.Update(self.instance, feedback_fn)
9104
9105     cstep = 5
9106     if self.early_release:
9107       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9108       cstep += 1
9109       self._RemoveOldStorage(self.target_node, iv_names)
9110       # WARNING: we release both node locks here, do not do other RPCs
9111       # than WaitForSync to the primary node
9112       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9113                     names=[self.target_node, self.other_node])
9114
9115     # Wait for sync
9116     # This can fail as the old devices are degraded and _WaitForSync
9117     # does a combined result over all disks, so we don't check its return value
9118     self.lu.LogStep(cstep, steps_total, "Sync devices")
9119     cstep += 1
9120     _WaitForSync(self.lu, self.instance)
9121
9122     # Check all devices manually
9123     self._CheckDevices(self.instance.primary_node, iv_names)
9124
9125     # Step: remove old storage
9126     if not self.early_release:
9127       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9128       cstep += 1
9129       self._RemoveOldStorage(self.target_node, iv_names)
9130
9131   def _ExecDrbd8Secondary(self, feedback_fn):
9132     """Replace the secondary node for DRBD 8.
9133
9134     The algorithm for replace is quite complicated:
9135       - for all disks of the instance:
9136         - create new LVs on the new node with same names
9137         - shutdown the drbd device on the old secondary
9138         - disconnect the drbd network on the primary
9139         - create the drbd device on the new secondary
9140         - network attach the drbd on the primary, using an artifice:
9141           the drbd code for Attach() will connect to the network if it
9142           finds a device which is connected to the good local disks but
9143           not network enabled
9144       - wait for sync across all devices
9145       - remove all disks from the old secondary
9146
9147     Failures are not very well handled.
9148
9149     """
9150     steps_total = 6
9151
9152     # Step: check device activation
9153     self.lu.LogStep(1, steps_total, "Check device existence")
9154     self._CheckDisksExistence([self.instance.primary_node])
9155     self._CheckVolumeGroup([self.instance.primary_node])
9156
9157     # Step: check other node consistency
9158     self.lu.LogStep(2, steps_total, "Check peer consistency")
9159     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9160
9161     # Step: create new storage
9162     self.lu.LogStep(3, steps_total, "Allocate new storage")
9163     for idx, dev in enumerate(self.instance.disks):
9164       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9165                       (self.new_node, idx))
9166       # we pass force_create=True to force LVM creation
9167       for new_lv in dev.children:
9168         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9169                         _GetInstanceInfoText(self.instance), False)
9170
9171     # Step 4: dbrd minors and drbd setups changes
9172     # after this, we must manually remove the drbd minors on both the
9173     # error and the success paths
9174     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9175     minors = self.cfg.AllocateDRBDMinor([self.new_node
9176                                          for dev in self.instance.disks],
9177                                         self.instance.name)
9178     logging.debug("Allocated minors %r", minors)
9179
9180     iv_names = {}
9181     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9182       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9183                       (self.new_node, idx))
9184       # create new devices on new_node; note that we create two IDs:
9185       # one without port, so the drbd will be activated without
9186       # networking information on the new node at this stage, and one
9187       # with network, for the latter activation in step 4
9188       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9189       if self.instance.primary_node == o_node1:
9190         p_minor = o_minor1
9191       else:
9192         assert self.instance.primary_node == o_node2, "Three-node instance?"
9193         p_minor = o_minor2
9194
9195       new_alone_id = (self.instance.primary_node, self.new_node, None,
9196                       p_minor, new_minor, o_secret)
9197       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9198                     p_minor, new_minor, o_secret)
9199
9200       iv_names[idx] = (dev, dev.children, new_net_id)
9201       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9202                     new_net_id)
9203       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9204                               logical_id=new_alone_id,
9205                               children=dev.children,
9206                               size=dev.size)
9207       try:
9208         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9209                               _GetInstanceInfoText(self.instance), False)
9210       except errors.GenericError:
9211         self.cfg.ReleaseDRBDMinors(self.instance.name)
9212         raise
9213
9214     # We have new devices, shutdown the drbd on the old secondary
9215     for idx, dev in enumerate(self.instance.disks):
9216       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9217       self.cfg.SetDiskID(dev, self.target_node)
9218       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9219       if msg:
9220         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9221                            "node: %s" % (idx, msg),
9222                            hint=("Please cleanup this device manually as"
9223                                  " soon as possible"))
9224
9225     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9226     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9227                                                self.node_secondary_ip,
9228                                                self.instance.disks)\
9229                                               [self.instance.primary_node]
9230
9231     msg = result.fail_msg
9232     if msg:
9233       # detaches didn't succeed (unlikely)
9234       self.cfg.ReleaseDRBDMinors(self.instance.name)
9235       raise errors.OpExecError("Can't detach the disks from the network on"
9236                                " old node: %s" % (msg,))
9237
9238     # if we managed to detach at least one, we update all the disks of
9239     # the instance to point to the new secondary
9240     self.lu.LogInfo("Updating instance configuration")
9241     for dev, _, new_logical_id in iv_names.itervalues():
9242       dev.logical_id = new_logical_id
9243       self.cfg.SetDiskID(dev, self.instance.primary_node)
9244
9245     self.cfg.Update(self.instance, feedback_fn)
9246
9247     # and now perform the drbd attach
9248     self.lu.LogInfo("Attaching primary drbds to new secondary"
9249                     " (standalone => connected)")
9250     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9251                                             self.new_node],
9252                                            self.node_secondary_ip,
9253                                            self.instance.disks,
9254                                            self.instance.name,
9255                                            False)
9256     for to_node, to_result in result.items():
9257       msg = to_result.fail_msg
9258       if msg:
9259         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9260                            to_node, msg,
9261                            hint=("please do a gnt-instance info to see the"
9262                                  " status of disks"))
9263     cstep = 5
9264     if self.early_release:
9265       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9266       cstep += 1
9267       self._RemoveOldStorage(self.target_node, iv_names)
9268       # WARNING: we release all node locks here, do not do other RPCs
9269       # than WaitForSync to the primary node
9270       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9271                     names=[self.instance.primary_node,
9272                            self.target_node,
9273                            self.new_node])
9274
9275     # Wait for sync
9276     # This can fail as the old devices are degraded and _WaitForSync
9277     # does a combined result over all disks, so we don't check its return value
9278     self.lu.LogStep(cstep, steps_total, "Sync devices")
9279     cstep += 1
9280     _WaitForSync(self.lu, self.instance)
9281
9282     # Check all devices manually
9283     self._CheckDevices(self.instance.primary_node, iv_names)
9284
9285     # Step: remove old storage
9286     if not self.early_release:
9287       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9288       self._RemoveOldStorage(self.target_node, iv_names)
9289
9290
9291 class LURepairNodeStorage(NoHooksLU):
9292   """Repairs the volume group on a node.
9293
9294   """
9295   REQ_BGL = False
9296
9297   def CheckArguments(self):
9298     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9299
9300     storage_type = self.op.storage_type
9301
9302     if (constants.SO_FIX_CONSISTENCY not in
9303         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9304       raise errors.OpPrereqError("Storage units of type '%s' can not be"
9305                                  " repaired" % storage_type,
9306                                  errors.ECODE_INVAL)
9307
9308   def ExpandNames(self):
9309     self.needed_locks = {
9310       locking.LEVEL_NODE: [self.op.node_name],
9311       }
9312
9313   def _CheckFaultyDisks(self, instance, node_name):
9314     """Ensure faulty disks abort the opcode or at least warn."""
9315     try:
9316       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9317                                   node_name, True):
9318         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9319                                    " node '%s'" % (instance.name, node_name),
9320                                    errors.ECODE_STATE)
9321     except errors.OpPrereqError, err:
9322       if self.op.ignore_consistency:
9323         self.proc.LogWarning(str(err.args[0]))
9324       else:
9325         raise
9326
9327   def CheckPrereq(self):
9328     """Check prerequisites.
9329
9330     """
9331     # Check whether any instance on this node has faulty disks
9332     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9333       if not inst.admin_up:
9334         continue
9335       check_nodes = set(inst.all_nodes)
9336       check_nodes.discard(self.op.node_name)
9337       for inst_node_name in check_nodes:
9338         self._CheckFaultyDisks(inst, inst_node_name)
9339
9340   def Exec(self, feedback_fn):
9341     feedback_fn("Repairing storage unit '%s' on %s ..." %
9342                 (self.op.name, self.op.node_name))
9343
9344     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9345     result = self.rpc.call_storage_execute(self.op.node_name,
9346                                            self.op.storage_type, st_args,
9347                                            self.op.name,
9348                                            constants.SO_FIX_CONSISTENCY)
9349     result.Raise("Failed to repair storage unit '%s' on %s" %
9350                  (self.op.name, self.op.node_name))
9351
9352
9353 class LUNodeEvacStrategy(NoHooksLU):
9354   """Computes the node evacuation strategy.
9355
9356   """
9357   REQ_BGL = False
9358
9359   def CheckArguments(self):
9360     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9361
9362   def ExpandNames(self):
9363     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9364     self.needed_locks = locks = {}
9365     if self.op.remote_node is None:
9366       locks[locking.LEVEL_NODE] = locking.ALL_SET
9367     else:
9368       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9369       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9370
9371   def Exec(self, feedback_fn):
9372     if self.op.remote_node is not None:
9373       instances = []
9374       for node in self.op.nodes:
9375         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9376       result = []
9377       for i in instances:
9378         if i.primary_node == self.op.remote_node:
9379           raise errors.OpPrereqError("Node %s is the primary node of"
9380                                      " instance %s, cannot use it as"
9381                                      " secondary" %
9382                                      (self.op.remote_node, i.name),
9383                                      errors.ECODE_INVAL)
9384         result.append([i.name, self.op.remote_node])
9385     else:
9386       ial = IAllocator(self.cfg, self.rpc,
9387                        mode=constants.IALLOCATOR_MODE_MEVAC,
9388                        evac_nodes=self.op.nodes)
9389       ial.Run(self.op.iallocator, validate=True)
9390       if not ial.success:
9391         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9392                                  errors.ECODE_NORES)
9393       result = ial.result
9394     return result
9395
9396
9397 class LUInstanceGrowDisk(LogicalUnit):
9398   """Grow a disk of an instance.
9399
9400   """
9401   HPATH = "disk-grow"
9402   HTYPE = constants.HTYPE_INSTANCE
9403   REQ_BGL = False
9404
9405   def ExpandNames(self):
9406     self._ExpandAndLockInstance()
9407     self.needed_locks[locking.LEVEL_NODE] = []
9408     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9409
9410   def DeclareLocks(self, level):
9411     if level == locking.LEVEL_NODE:
9412       self._LockInstancesNodes()
9413
9414   def BuildHooksEnv(self):
9415     """Build hooks env.
9416
9417     This runs on the master, the primary and all the secondaries.
9418
9419     """
9420     env = {
9421       "DISK": self.op.disk,
9422       "AMOUNT": self.op.amount,
9423       }
9424     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9425     return env
9426
9427   def BuildHooksNodes(self):
9428     """Build hooks nodes.
9429
9430     """
9431     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9432     return (nl, nl)
9433
9434   def CheckPrereq(self):
9435     """Check prerequisites.
9436
9437     This checks that the instance is in the cluster.
9438
9439     """
9440     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9441     assert instance is not None, \
9442       "Cannot retrieve locked instance %s" % self.op.instance_name
9443     nodenames = list(instance.all_nodes)
9444     for node in nodenames:
9445       _CheckNodeOnline(self, node)
9446
9447     self.instance = instance
9448
9449     if instance.disk_template not in constants.DTS_GROWABLE:
9450       raise errors.OpPrereqError("Instance's disk layout does not support"
9451                                  " growing.", errors.ECODE_INVAL)
9452
9453     self.disk = instance.FindDisk(self.op.disk)
9454
9455     if instance.disk_template not in (constants.DT_FILE,
9456                                       constants.DT_SHARED_FILE):
9457       # TODO: check the free disk space for file, when that feature will be
9458       # supported
9459       _CheckNodesFreeDiskPerVG(self, nodenames,
9460                                self.disk.ComputeGrowth(self.op.amount))
9461
9462   def Exec(self, feedback_fn):
9463     """Execute disk grow.
9464
9465     """
9466     instance = self.instance
9467     disk = self.disk
9468
9469     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9470     if not disks_ok:
9471       raise errors.OpExecError("Cannot activate block device to grow")
9472
9473     for node in instance.all_nodes:
9474       self.cfg.SetDiskID(disk, node)
9475       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9476       result.Raise("Grow request failed to node %s" % node)
9477
9478       # TODO: Rewrite code to work properly
9479       # DRBD goes into sync mode for a short amount of time after executing the
9480       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9481       # calling "resize" in sync mode fails. Sleeping for a short amount of
9482       # time is a work-around.
9483       time.sleep(5)
9484
9485     disk.RecordGrow(self.op.amount)
9486     self.cfg.Update(instance, feedback_fn)
9487     if self.op.wait_for_sync:
9488       disk_abort = not _WaitForSync(self, instance, disks=[disk])
9489       if disk_abort:
9490         self.proc.LogWarning("Warning: disk sync-ing has not returned a good"
9491                              " status.\nPlease check the instance.")
9492       if not instance.admin_up:
9493         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9494     elif not instance.admin_up:
9495       self.proc.LogWarning("Not shutting down the disk even if the instance is"
9496                            " not supposed to be running because no wait for"
9497                            " sync mode was requested.")
9498
9499
9500 class LUInstanceQueryData(NoHooksLU):
9501   """Query runtime instance data.
9502
9503   """
9504   REQ_BGL = False
9505
9506   def ExpandNames(self):
9507     self.needed_locks = {}
9508
9509     # Use locking if requested or when non-static information is wanted
9510     if not (self.op.static or self.op.use_locking):
9511       self.LogWarning("Non-static data requested, locks need to be acquired")
9512       self.op.use_locking = True
9513
9514     if self.op.instances or not self.op.use_locking:
9515       # Expand instance names right here
9516       self.wanted_names = _GetWantedInstances(self, self.op.instances)
9517     else:
9518       # Will use acquired locks
9519       self.wanted_names = None
9520
9521     if self.op.use_locking:
9522       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9523
9524       if self.wanted_names is None:
9525         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9526       else:
9527         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9528
9529       self.needed_locks[locking.LEVEL_NODE] = []
9530       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9531       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9532
9533   def DeclareLocks(self, level):
9534     if self.op.use_locking and level == locking.LEVEL_NODE:
9535       self._LockInstancesNodes()
9536
9537   def CheckPrereq(self):
9538     """Check prerequisites.
9539
9540     This only checks the optional instance list against the existing names.
9541
9542     """
9543     if self.wanted_names is None:
9544       assert self.op.use_locking, "Locking was not used"
9545       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9546
9547     self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9548                              for name in self.wanted_names]
9549
9550   def _ComputeBlockdevStatus(self, node, instance_name, dev):
9551     """Returns the status of a block device
9552
9553     """
9554     if self.op.static or not node:
9555       return None
9556
9557     self.cfg.SetDiskID(dev, node)
9558
9559     result = self.rpc.call_blockdev_find(node, dev)
9560     if result.offline:
9561       return None
9562
9563     result.Raise("Can't compute disk status for %s" % instance_name)
9564
9565     status = result.payload
9566     if status is None:
9567       return None
9568
9569     return (status.dev_path, status.major, status.minor,
9570             status.sync_percent, status.estimated_time,
9571             status.is_degraded, status.ldisk_status)
9572
9573   def _ComputeDiskStatus(self, instance, snode, dev):
9574     """Compute block device status.
9575
9576     """
9577     if dev.dev_type in constants.LDS_DRBD:
9578       # we change the snode then (otherwise we use the one passed in)
9579       if dev.logical_id[0] == instance.primary_node:
9580         snode = dev.logical_id[1]
9581       else:
9582         snode = dev.logical_id[0]
9583
9584     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9585                                               instance.name, dev)
9586     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9587
9588     if dev.children:
9589       dev_children = [self._ComputeDiskStatus(instance, snode, child)
9590                       for child in dev.children]
9591     else:
9592       dev_children = []
9593
9594     return {
9595       "iv_name": dev.iv_name,
9596       "dev_type": dev.dev_type,
9597       "logical_id": dev.logical_id,
9598       "physical_id": dev.physical_id,
9599       "pstatus": dev_pstatus,
9600       "sstatus": dev_sstatus,
9601       "children": dev_children,
9602       "mode": dev.mode,
9603       "size": dev.size,
9604       }
9605
9606   def Exec(self, feedback_fn):
9607     """Gather and return data"""
9608     result = {}
9609
9610     cluster = self.cfg.GetClusterInfo()
9611
9612     for instance in self.wanted_instances:
9613       if not self.op.static:
9614         remote_info = self.rpc.call_instance_info(instance.primary_node,
9615                                                   instance.name,
9616                                                   instance.hypervisor)
9617         remote_info.Raise("Error checking node %s" % instance.primary_node)
9618         remote_info = remote_info.payload
9619         if remote_info and "state" in remote_info:
9620           remote_state = "up"
9621         else:
9622           remote_state = "down"
9623       else:
9624         remote_state = None
9625       if instance.admin_up:
9626         config_state = "up"
9627       else:
9628         config_state = "down"
9629
9630       disks = [self._ComputeDiskStatus(instance, None, device)
9631                for device in instance.disks]
9632
9633       result[instance.name] = {
9634         "name": instance.name,
9635         "config_state": config_state,
9636         "run_state": remote_state,
9637         "pnode": instance.primary_node,
9638         "snodes": instance.secondary_nodes,
9639         "os": instance.os,
9640         # this happens to be the same format used for hooks
9641         "nics": _NICListToTuple(self, instance.nics),
9642         "disk_template": instance.disk_template,
9643         "disks": disks,
9644         "hypervisor": instance.hypervisor,
9645         "network_port": instance.network_port,
9646         "hv_instance": instance.hvparams,
9647         "hv_actual": cluster.FillHV(instance, skip_globals=True),
9648         "be_instance": instance.beparams,
9649         "be_actual": cluster.FillBE(instance),
9650         "os_instance": instance.osparams,
9651         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9652         "serial_no": instance.serial_no,
9653         "mtime": instance.mtime,
9654         "ctime": instance.ctime,
9655         "uuid": instance.uuid,
9656         }
9657
9658     return result
9659
9660
9661 class LUInstanceSetParams(LogicalUnit):
9662   """Modifies an instances's parameters.
9663
9664   """
9665   HPATH = "instance-modify"
9666   HTYPE = constants.HTYPE_INSTANCE
9667   REQ_BGL = False
9668
9669   def CheckArguments(self):
9670     if not (self.op.nics or self.op.disks or self.op.disk_template or
9671             self.op.hvparams or self.op.beparams or self.op.os_name):
9672       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9673
9674     if self.op.hvparams:
9675       _CheckGlobalHvParams(self.op.hvparams)
9676
9677     # Disk validation
9678     disk_addremove = 0
9679     for disk_op, disk_dict in self.op.disks:
9680       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9681       if disk_op == constants.DDM_REMOVE:
9682         disk_addremove += 1
9683         continue
9684       elif disk_op == constants.DDM_ADD:
9685         disk_addremove += 1
9686       else:
9687         if not isinstance(disk_op, int):
9688           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9689         if not isinstance(disk_dict, dict):
9690           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9691           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9692
9693       if disk_op == constants.DDM_ADD:
9694         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9695         if mode not in constants.DISK_ACCESS_SET:
9696           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9697                                      errors.ECODE_INVAL)
9698         size = disk_dict.get(constants.IDISK_SIZE, None)
9699         if size is None:
9700           raise errors.OpPrereqError("Required disk parameter size missing",
9701                                      errors.ECODE_INVAL)
9702         try:
9703           size = int(size)
9704         except (TypeError, ValueError), err:
9705           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9706                                      str(err), errors.ECODE_INVAL)
9707         disk_dict[constants.IDISK_SIZE] = size
9708       else:
9709         # modification of disk
9710         if constants.IDISK_SIZE in disk_dict:
9711           raise errors.OpPrereqError("Disk size change not possible, use"
9712                                      " grow-disk", errors.ECODE_INVAL)
9713
9714     if disk_addremove > 1:
9715       raise errors.OpPrereqError("Only one disk add or remove operation"
9716                                  " supported at a time", errors.ECODE_INVAL)
9717
9718     if self.op.disks and self.op.disk_template is not None:
9719       raise errors.OpPrereqError("Disk template conversion and other disk"
9720                                  " changes not supported at the same time",
9721                                  errors.ECODE_INVAL)
9722
9723     if (self.op.disk_template and
9724         self.op.disk_template in constants.DTS_INT_MIRROR and
9725         self.op.remote_node is None):
9726       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9727                                  " one requires specifying a secondary node",
9728                                  errors.ECODE_INVAL)
9729
9730     # NIC validation
9731     nic_addremove = 0
9732     for nic_op, nic_dict in self.op.nics:
9733       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9734       if nic_op == constants.DDM_REMOVE:
9735         nic_addremove += 1
9736         continue
9737       elif nic_op == constants.DDM_ADD:
9738         nic_addremove += 1
9739       else:
9740         if not isinstance(nic_op, int):
9741           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9742         if not isinstance(nic_dict, dict):
9743           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9744           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9745
9746       # nic_dict should be a dict
9747       nic_ip = nic_dict.get(constants.INIC_IP, None)
9748       if nic_ip is not None:
9749         if nic_ip.lower() == constants.VALUE_NONE:
9750           nic_dict[constants.INIC_IP] = None
9751         else:
9752           if not netutils.IPAddress.IsValid(nic_ip):
9753             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9754                                        errors.ECODE_INVAL)
9755
9756       nic_bridge = nic_dict.get('bridge', None)
9757       nic_link = nic_dict.get(constants.INIC_LINK, None)
9758       if nic_bridge and nic_link:
9759         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9760                                    " at the same time", errors.ECODE_INVAL)
9761       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9762         nic_dict['bridge'] = None
9763       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9764         nic_dict[constants.INIC_LINK] = None
9765
9766       if nic_op == constants.DDM_ADD:
9767         nic_mac = nic_dict.get(constants.INIC_MAC, None)
9768         if nic_mac is None:
9769           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9770
9771       if constants.INIC_MAC in nic_dict:
9772         nic_mac = nic_dict[constants.INIC_MAC]
9773         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9774           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9775
9776         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9777           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9778                                      " modifying an existing nic",
9779                                      errors.ECODE_INVAL)
9780
9781     if nic_addremove > 1:
9782       raise errors.OpPrereqError("Only one NIC add or remove operation"
9783                                  " supported at a time", errors.ECODE_INVAL)
9784
9785   def ExpandNames(self):
9786     self._ExpandAndLockInstance()
9787     self.needed_locks[locking.LEVEL_NODE] = []
9788     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9789
9790   def DeclareLocks(self, level):
9791     if level == locking.LEVEL_NODE:
9792       self._LockInstancesNodes()
9793       if self.op.disk_template and self.op.remote_node:
9794         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9795         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9796
9797   def BuildHooksEnv(self):
9798     """Build hooks env.
9799
9800     This runs on the master, primary and secondaries.
9801
9802     """
9803     args = dict()
9804     if constants.BE_MEMORY in self.be_new:
9805       args['memory'] = self.be_new[constants.BE_MEMORY]
9806     if constants.BE_VCPUS in self.be_new:
9807       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9808     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9809     # information at all.
9810     if self.op.nics:
9811       args['nics'] = []
9812       nic_override = dict(self.op.nics)
9813       for idx, nic in enumerate(self.instance.nics):
9814         if idx in nic_override:
9815           this_nic_override = nic_override[idx]
9816         else:
9817           this_nic_override = {}
9818         if constants.INIC_IP in this_nic_override:
9819           ip = this_nic_override[constants.INIC_IP]
9820         else:
9821           ip = nic.ip
9822         if constants.INIC_MAC in this_nic_override:
9823           mac = this_nic_override[constants.INIC_MAC]
9824         else:
9825           mac = nic.mac
9826         if idx in self.nic_pnew:
9827           nicparams = self.nic_pnew[idx]
9828         else:
9829           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9830         mode = nicparams[constants.NIC_MODE]
9831         link = nicparams[constants.NIC_LINK]
9832         args['nics'].append((ip, mac, mode, link))
9833       if constants.DDM_ADD in nic_override:
9834         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
9835         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
9836         nicparams = self.nic_pnew[constants.DDM_ADD]
9837         mode = nicparams[constants.NIC_MODE]
9838         link = nicparams[constants.NIC_LINK]
9839         args['nics'].append((ip, mac, mode, link))
9840       elif constants.DDM_REMOVE in nic_override:
9841         del args['nics'][-1]
9842
9843     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9844     if self.op.disk_template:
9845       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9846
9847     return env
9848
9849   def BuildHooksNodes(self):
9850     """Build hooks nodes.
9851
9852     """
9853     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9854     return (nl, nl)
9855
9856   def CheckPrereq(self):
9857     """Check prerequisites.
9858
9859     This only checks the instance list against the existing names.
9860
9861     """
9862     # checking the new params on the primary/secondary nodes
9863
9864     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9865     cluster = self.cluster = self.cfg.GetClusterInfo()
9866     assert self.instance is not None, \
9867       "Cannot retrieve locked instance %s" % self.op.instance_name
9868     pnode = instance.primary_node
9869     nodelist = list(instance.all_nodes)
9870
9871     # OS change
9872     if self.op.os_name and not self.op.force:
9873       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9874                       self.op.force_variant)
9875       instance_os = self.op.os_name
9876     else:
9877       instance_os = instance.os
9878
9879     if self.op.disk_template:
9880       if instance.disk_template == self.op.disk_template:
9881         raise errors.OpPrereqError("Instance already has disk template %s" %
9882                                    instance.disk_template, errors.ECODE_INVAL)
9883
9884       if (instance.disk_template,
9885           self.op.disk_template) not in self._DISK_CONVERSIONS:
9886         raise errors.OpPrereqError("Unsupported disk template conversion from"
9887                                    " %s to %s" % (instance.disk_template,
9888                                                   self.op.disk_template),
9889                                    errors.ECODE_INVAL)
9890       _CheckInstanceDown(self, instance, "cannot change disk template")
9891       if self.op.disk_template in constants.DTS_INT_MIRROR:
9892         if self.op.remote_node == pnode:
9893           raise errors.OpPrereqError("Given new secondary node %s is the same"
9894                                      " as the primary node of the instance" %
9895                                      self.op.remote_node, errors.ECODE_STATE)
9896         _CheckNodeOnline(self, self.op.remote_node)
9897         _CheckNodeNotDrained(self, self.op.remote_node)
9898         # FIXME: here we assume that the old instance type is DT_PLAIN
9899         assert instance.disk_template == constants.DT_PLAIN
9900         disks = [{constants.IDISK_SIZE: d.size,
9901                   constants.IDISK_VG: d.logical_id[0]}
9902                  for d in instance.disks]
9903         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9904         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9905
9906     # hvparams processing
9907     if self.op.hvparams:
9908       hv_type = instance.hypervisor
9909       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9910       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9911       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9912
9913       # local check
9914       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9915       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9916       self.hv_new = hv_new # the new actual values
9917       self.hv_inst = i_hvdict # the new dict (without defaults)
9918     else:
9919       self.hv_new = self.hv_inst = {}
9920
9921     # beparams processing
9922     if self.op.beparams:
9923       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9924                                    use_none=True)
9925       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9926       be_new = cluster.SimpleFillBE(i_bedict)
9927       self.be_new = be_new # the new actual values
9928       self.be_inst = i_bedict # the new dict (without defaults)
9929     else:
9930       self.be_new = self.be_inst = {}
9931
9932     # osparams processing
9933     if self.op.osparams:
9934       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9935       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9936       self.os_inst = i_osdict # the new dict (without defaults)
9937     else:
9938       self.os_inst = {}
9939
9940     self.warn = []
9941
9942     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9943       mem_check_list = [pnode]
9944       if be_new[constants.BE_AUTO_BALANCE]:
9945         # either we changed auto_balance to yes or it was from before
9946         mem_check_list.extend(instance.secondary_nodes)
9947       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9948                                                   instance.hypervisor)
9949       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9950                                          instance.hypervisor)
9951       pninfo = nodeinfo[pnode]
9952       msg = pninfo.fail_msg
9953       if msg:
9954         # Assume the primary node is unreachable and go ahead
9955         self.warn.append("Can't get info from primary node %s: %s" %
9956                          (pnode,  msg))
9957       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9958         self.warn.append("Node data from primary node %s doesn't contain"
9959                          " free memory information" % pnode)
9960       elif instance_info.fail_msg:
9961         self.warn.append("Can't get instance runtime information: %s" %
9962                         instance_info.fail_msg)
9963       else:
9964         if instance_info.payload:
9965           current_mem = int(instance_info.payload['memory'])
9966         else:
9967           # Assume instance not running
9968           # (there is a slight race condition here, but it's not very probable,
9969           # and we have no other way to check)
9970           current_mem = 0
9971         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9972                     pninfo.payload['memory_free'])
9973         if miss_mem > 0:
9974           raise errors.OpPrereqError("This change will prevent the instance"
9975                                      " from starting, due to %d MB of memory"
9976                                      " missing on its primary node" % miss_mem,
9977                                      errors.ECODE_NORES)
9978
9979       if be_new[constants.BE_AUTO_BALANCE]:
9980         for node, nres in nodeinfo.items():
9981           if node not in instance.secondary_nodes:
9982             continue
9983           msg = nres.fail_msg
9984           if msg:
9985             self.warn.append("Can't get info from secondary node %s: %s" %
9986                              (node, msg))
9987           elif not isinstance(nres.payload.get('memory_free', None), int):
9988             self.warn.append("Secondary node %s didn't return free"
9989                              " memory information" % node)
9990           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9991             self.warn.append("Not enough memory to failover instance to"
9992                              " secondary node %s" % node)
9993
9994     # NIC processing
9995     self.nic_pnew = {}
9996     self.nic_pinst = {}
9997     for nic_op, nic_dict in self.op.nics:
9998       if nic_op == constants.DDM_REMOVE:
9999         if not instance.nics:
10000           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10001                                      errors.ECODE_INVAL)
10002         continue
10003       if nic_op != constants.DDM_ADD:
10004         # an existing nic
10005         if not instance.nics:
10006           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10007                                      " no NICs" % nic_op,
10008                                      errors.ECODE_INVAL)
10009         if nic_op < 0 or nic_op >= len(instance.nics):
10010           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10011                                      " are 0 to %d" %
10012                                      (nic_op, len(instance.nics) - 1),
10013                                      errors.ECODE_INVAL)
10014         old_nic_params = instance.nics[nic_op].nicparams
10015         old_nic_ip = instance.nics[nic_op].ip
10016       else:
10017         old_nic_params = {}
10018         old_nic_ip = None
10019
10020       update_params_dict = dict([(key, nic_dict[key])
10021                                  for key in constants.NICS_PARAMETERS
10022                                  if key in nic_dict])
10023
10024       if 'bridge' in nic_dict:
10025         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10026
10027       new_nic_params = _GetUpdatedParams(old_nic_params,
10028                                          update_params_dict)
10029       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10030       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10031       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10032       self.nic_pinst[nic_op] = new_nic_params
10033       self.nic_pnew[nic_op] = new_filled_nic_params
10034       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10035
10036       if new_nic_mode == constants.NIC_MODE_BRIDGED:
10037         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10038         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10039         if msg:
10040           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10041           if self.op.force:
10042             self.warn.append(msg)
10043           else:
10044             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10045       if new_nic_mode == constants.NIC_MODE_ROUTED:
10046         if constants.INIC_IP in nic_dict:
10047           nic_ip = nic_dict[constants.INIC_IP]
10048         else:
10049           nic_ip = old_nic_ip
10050         if nic_ip is None:
10051           raise errors.OpPrereqError('Cannot set the nic ip to None'
10052                                      ' on a routed nic', errors.ECODE_INVAL)
10053       if constants.INIC_MAC in nic_dict:
10054         nic_mac = nic_dict[constants.INIC_MAC]
10055         if nic_mac is None:
10056           raise errors.OpPrereqError('Cannot set the nic mac to None',
10057                                      errors.ECODE_INVAL)
10058         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10059           # otherwise generate the mac
10060           nic_dict[constants.INIC_MAC] = \
10061             self.cfg.GenerateMAC(self.proc.GetECId())
10062         else:
10063           # or validate/reserve the current one
10064           try:
10065             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10066           except errors.ReservationError:
10067             raise errors.OpPrereqError("MAC address %s already in use"
10068                                        " in cluster" % nic_mac,
10069                                        errors.ECODE_NOTUNIQUE)
10070
10071     # DISK processing
10072     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10073       raise errors.OpPrereqError("Disk operations not supported for"
10074                                  " diskless instances",
10075                                  errors.ECODE_INVAL)
10076     for disk_op, _ in self.op.disks:
10077       if disk_op == constants.DDM_REMOVE:
10078         if len(instance.disks) == 1:
10079           raise errors.OpPrereqError("Cannot remove the last disk of"
10080                                      " an instance", errors.ECODE_INVAL)
10081         _CheckInstanceDown(self, instance, "cannot remove disks")
10082
10083       if (disk_op == constants.DDM_ADD and
10084           len(instance.disks) >= constants.MAX_DISKS):
10085         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10086                                    " add more" % constants.MAX_DISKS,
10087                                    errors.ECODE_STATE)
10088       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10089         # an existing disk
10090         if disk_op < 0 or disk_op >= len(instance.disks):
10091           raise errors.OpPrereqError("Invalid disk index %s, valid values"
10092                                      " are 0 to %d" %
10093                                      (disk_op, len(instance.disks)),
10094                                      errors.ECODE_INVAL)
10095
10096     return
10097
10098   def _ConvertPlainToDrbd(self, feedback_fn):
10099     """Converts an instance from plain to drbd.
10100
10101     """
10102     feedback_fn("Converting template to drbd")
10103     instance = self.instance
10104     pnode = instance.primary_node
10105     snode = self.op.remote_node
10106
10107     # create a fake disk info for _GenerateDiskTemplate
10108     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10109                   constants.IDISK_VG: d.logical_id[0]}
10110                  for d in instance.disks]
10111     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10112                                       instance.name, pnode, [snode],
10113                                       disk_info, None, None, 0, feedback_fn)
10114     info = _GetInstanceInfoText(instance)
10115     feedback_fn("Creating aditional volumes...")
10116     # first, create the missing data and meta devices
10117     for disk in new_disks:
10118       # unfortunately this is... not too nice
10119       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10120                             info, True)
10121       for child in disk.children:
10122         _CreateSingleBlockDev(self, snode, instance, child, info, True)
10123     # at this stage, all new LVs have been created, we can rename the
10124     # old ones
10125     feedback_fn("Renaming original volumes...")
10126     rename_list = [(o, n.children[0].logical_id)
10127                    for (o, n) in zip(instance.disks, new_disks)]
10128     result = self.rpc.call_blockdev_rename(pnode, rename_list)
10129     result.Raise("Failed to rename original LVs")
10130
10131     feedback_fn("Initializing DRBD devices...")
10132     # all child devices are in place, we can now create the DRBD devices
10133     for disk in new_disks:
10134       for node in [pnode, snode]:
10135         f_create = node == pnode
10136         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10137
10138     # at this point, the instance has been modified
10139     instance.disk_template = constants.DT_DRBD8
10140     instance.disks = new_disks
10141     self.cfg.Update(instance, feedback_fn)
10142
10143     # disks are created, waiting for sync
10144     disk_abort = not _WaitForSync(self, instance)
10145     if disk_abort:
10146       raise errors.OpExecError("There are some degraded disks for"
10147                                " this instance, please cleanup manually")
10148
10149   def _ConvertDrbdToPlain(self, feedback_fn):
10150     """Converts an instance from drbd to plain.
10151
10152     """
10153     instance = self.instance
10154     assert len(instance.secondary_nodes) == 1
10155     pnode = instance.primary_node
10156     snode = instance.secondary_nodes[0]
10157     feedback_fn("Converting template to plain")
10158
10159     old_disks = instance.disks
10160     new_disks = [d.children[0] for d in old_disks]
10161
10162     # copy over size and mode
10163     for parent, child in zip(old_disks, new_disks):
10164       child.size = parent.size
10165       child.mode = parent.mode
10166
10167     # update instance structure
10168     instance.disks = new_disks
10169     instance.disk_template = constants.DT_PLAIN
10170     self.cfg.Update(instance, feedback_fn)
10171
10172     feedback_fn("Removing volumes on the secondary node...")
10173     for disk in old_disks:
10174       self.cfg.SetDiskID(disk, snode)
10175       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10176       if msg:
10177         self.LogWarning("Could not remove block device %s on node %s,"
10178                         " continuing anyway: %s", disk.iv_name, snode, msg)
10179
10180     feedback_fn("Removing unneeded volumes on the primary node...")
10181     for idx, disk in enumerate(old_disks):
10182       meta = disk.children[1]
10183       self.cfg.SetDiskID(meta, pnode)
10184       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10185       if msg:
10186         self.LogWarning("Could not remove metadata for disk %d on node %s,"
10187                         " continuing anyway: %s", idx, pnode, msg)
10188
10189   def Exec(self, feedback_fn):
10190     """Modifies an instance.
10191
10192     All parameters take effect only at the next restart of the instance.
10193
10194     """
10195     # Process here the warnings from CheckPrereq, as we don't have a
10196     # feedback_fn there.
10197     for warn in self.warn:
10198       feedback_fn("WARNING: %s" % warn)
10199
10200     result = []
10201     instance = self.instance
10202     # disk changes
10203     for disk_op, disk_dict in self.op.disks:
10204       if disk_op == constants.DDM_REMOVE:
10205         # remove the last disk
10206         device = instance.disks.pop()
10207         device_idx = len(instance.disks)
10208         for node, disk in device.ComputeNodeTree(instance.primary_node):
10209           self.cfg.SetDiskID(disk, node)
10210           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10211           if msg:
10212             self.LogWarning("Could not remove disk/%d on node %s: %s,"
10213                             " continuing anyway", device_idx, node, msg)
10214         result.append(("disk/%d" % device_idx, "remove"))
10215       elif disk_op == constants.DDM_ADD:
10216         # add a new disk
10217         if instance.disk_template in (constants.DT_FILE,
10218                                         constants.DT_SHARED_FILE):
10219           file_driver, file_path = instance.disks[0].logical_id
10220           file_path = os.path.dirname(file_path)
10221         else:
10222           file_driver = file_path = None
10223         disk_idx_base = len(instance.disks)
10224         new_disk = _GenerateDiskTemplate(self,
10225                                          instance.disk_template,
10226                                          instance.name, instance.primary_node,
10227                                          instance.secondary_nodes,
10228                                          [disk_dict],
10229                                          file_path,
10230                                          file_driver,
10231                                          disk_idx_base, feedback_fn)[0]
10232         instance.disks.append(new_disk)
10233         info = _GetInstanceInfoText(instance)
10234
10235         logging.info("Creating volume %s for instance %s",
10236                      new_disk.iv_name, instance.name)
10237         # Note: this needs to be kept in sync with _CreateDisks
10238         #HARDCODE
10239         for node in instance.all_nodes:
10240           f_create = node == instance.primary_node
10241           try:
10242             _CreateBlockDev(self, node, instance, new_disk,
10243                             f_create, info, f_create)
10244           except errors.OpExecError, err:
10245             self.LogWarning("Failed to create volume %s (%s) on"
10246                             " node %s: %s",
10247                             new_disk.iv_name, new_disk, node, err)
10248         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10249                        (new_disk.size, new_disk.mode)))
10250       else:
10251         # change a given disk
10252         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10253         result.append(("disk.mode/%d" % disk_op,
10254                        disk_dict[constants.IDISK_MODE]))
10255
10256     if self.op.disk_template:
10257       r_shut = _ShutdownInstanceDisks(self, instance)
10258       if not r_shut:
10259         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10260                                  " proceed with disk template conversion")
10261       mode = (instance.disk_template, self.op.disk_template)
10262       try:
10263         self._DISK_CONVERSIONS[mode](self, feedback_fn)
10264       except:
10265         self.cfg.ReleaseDRBDMinors(instance.name)
10266         raise
10267       result.append(("disk_template", self.op.disk_template))
10268
10269     # NIC changes
10270     for nic_op, nic_dict in self.op.nics:
10271       if nic_op == constants.DDM_REMOVE:
10272         # remove the last nic
10273         del instance.nics[-1]
10274         result.append(("nic.%d" % len(instance.nics), "remove"))
10275       elif nic_op == constants.DDM_ADD:
10276         # mac and bridge should be set, by now
10277         mac = nic_dict[constants.INIC_MAC]
10278         ip = nic_dict.get(constants.INIC_IP, None)
10279         nicparams = self.nic_pinst[constants.DDM_ADD]
10280         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10281         instance.nics.append(new_nic)
10282         result.append(("nic.%d" % (len(instance.nics) - 1),
10283                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
10284                        (new_nic.mac, new_nic.ip,
10285                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10286                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10287                        )))
10288       else:
10289         for key in (constants.INIC_MAC, constants.INIC_IP):
10290           if key in nic_dict:
10291             setattr(instance.nics[nic_op], key, nic_dict[key])
10292         if nic_op in self.nic_pinst:
10293           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10294         for key, val in nic_dict.iteritems():
10295           result.append(("nic.%s/%d" % (key, nic_op), val))
10296
10297     # hvparams changes
10298     if self.op.hvparams:
10299       instance.hvparams = self.hv_inst
10300       for key, val in self.op.hvparams.iteritems():
10301         result.append(("hv/%s" % key, val))
10302
10303     # beparams changes
10304     if self.op.beparams:
10305       instance.beparams = self.be_inst
10306       for key, val in self.op.beparams.iteritems():
10307         result.append(("be/%s" % key, val))
10308
10309     # OS change
10310     if self.op.os_name:
10311       instance.os = self.op.os_name
10312
10313     # osparams changes
10314     if self.op.osparams:
10315       instance.osparams = self.os_inst
10316       for key, val in self.op.osparams.iteritems():
10317         result.append(("os/%s" % key, val))
10318
10319     self.cfg.Update(instance, feedback_fn)
10320
10321     return result
10322
10323   _DISK_CONVERSIONS = {
10324     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10325     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10326     }
10327
10328
10329 class LUBackupQuery(NoHooksLU):
10330   """Query the exports list
10331
10332   """
10333   REQ_BGL = False
10334
10335   def ExpandNames(self):
10336     self.needed_locks = {}
10337     self.share_locks[locking.LEVEL_NODE] = 1
10338     if not self.op.nodes:
10339       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10340     else:
10341       self.needed_locks[locking.LEVEL_NODE] = \
10342         _GetWantedNodes(self, self.op.nodes)
10343
10344   def Exec(self, feedback_fn):
10345     """Compute the list of all the exported system images.
10346
10347     @rtype: dict
10348     @return: a dictionary with the structure node->(export-list)
10349         where export-list is a list of the instances exported on
10350         that node.
10351
10352     """
10353     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10354     rpcresult = self.rpc.call_export_list(self.nodes)
10355     result = {}
10356     for node in rpcresult:
10357       if rpcresult[node].fail_msg:
10358         result[node] = False
10359       else:
10360         result[node] = rpcresult[node].payload
10361
10362     return result
10363
10364
10365 class LUBackupPrepare(NoHooksLU):
10366   """Prepares an instance for an export and returns useful information.
10367
10368   """
10369   REQ_BGL = False
10370
10371   def ExpandNames(self):
10372     self._ExpandAndLockInstance()
10373
10374   def CheckPrereq(self):
10375     """Check prerequisites.
10376
10377     """
10378     instance_name = self.op.instance_name
10379
10380     self.instance = self.cfg.GetInstanceInfo(instance_name)
10381     assert self.instance is not None, \
10382           "Cannot retrieve locked instance %s" % self.op.instance_name
10383     _CheckNodeOnline(self, self.instance.primary_node)
10384
10385     self._cds = _GetClusterDomainSecret()
10386
10387   def Exec(self, feedback_fn):
10388     """Prepares an instance for an export.
10389
10390     """
10391     instance = self.instance
10392
10393     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10394       salt = utils.GenerateSecret(8)
10395
10396       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10397       result = self.rpc.call_x509_cert_create(instance.primary_node,
10398                                               constants.RIE_CERT_VALIDITY)
10399       result.Raise("Can't create X509 key and certificate on %s" % result.node)
10400
10401       (name, cert_pem) = result.payload
10402
10403       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10404                                              cert_pem)
10405
10406       return {
10407         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10408         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10409                           salt),
10410         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10411         }
10412
10413     return None
10414
10415
10416 class LUBackupExport(LogicalUnit):
10417   """Export an instance to an image in the cluster.
10418
10419   """
10420   HPATH = "instance-export"
10421   HTYPE = constants.HTYPE_INSTANCE
10422   REQ_BGL = False
10423
10424   def CheckArguments(self):
10425     """Check the arguments.
10426
10427     """
10428     self.x509_key_name = self.op.x509_key_name
10429     self.dest_x509_ca_pem = self.op.destination_x509_ca
10430
10431     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10432       if not self.x509_key_name:
10433         raise errors.OpPrereqError("Missing X509 key name for encryption",
10434                                    errors.ECODE_INVAL)
10435
10436       if not self.dest_x509_ca_pem:
10437         raise errors.OpPrereqError("Missing destination X509 CA",
10438                                    errors.ECODE_INVAL)
10439
10440   def ExpandNames(self):
10441     self._ExpandAndLockInstance()
10442
10443     # Lock all nodes for local exports
10444     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10445       # FIXME: lock only instance primary and destination node
10446       #
10447       # Sad but true, for now we have do lock all nodes, as we don't know where
10448       # the previous export might be, and in this LU we search for it and
10449       # remove it from its current node. In the future we could fix this by:
10450       #  - making a tasklet to search (share-lock all), then create the
10451       #    new one, then one to remove, after
10452       #  - removing the removal operation altogether
10453       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10454
10455   def DeclareLocks(self, level):
10456     """Last minute lock declaration."""
10457     # All nodes are locked anyway, so nothing to do here.
10458
10459   def BuildHooksEnv(self):
10460     """Build hooks env.
10461
10462     This will run on the master, primary node and target node.
10463
10464     """
10465     env = {
10466       "EXPORT_MODE": self.op.mode,
10467       "EXPORT_NODE": self.op.target_node,
10468       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10469       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10470       # TODO: Generic function for boolean env variables
10471       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10472       }
10473
10474     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10475
10476     return env
10477
10478   def BuildHooksNodes(self):
10479     """Build hooks nodes.
10480
10481     """
10482     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10483
10484     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10485       nl.append(self.op.target_node)
10486
10487     return (nl, nl)
10488
10489   def CheckPrereq(self):
10490     """Check prerequisites.
10491
10492     This checks that the instance and node names are valid.
10493
10494     """
10495     instance_name = self.op.instance_name
10496
10497     self.instance = self.cfg.GetInstanceInfo(instance_name)
10498     assert self.instance is not None, \
10499           "Cannot retrieve locked instance %s" % self.op.instance_name
10500     _CheckNodeOnline(self, self.instance.primary_node)
10501
10502     if (self.op.remove_instance and self.instance.admin_up and
10503         not self.op.shutdown):
10504       raise errors.OpPrereqError("Can not remove instance without shutting it"
10505                                  " down before")
10506
10507     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10508       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10509       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10510       assert self.dst_node is not None
10511
10512       _CheckNodeOnline(self, self.dst_node.name)
10513       _CheckNodeNotDrained(self, self.dst_node.name)
10514
10515       self._cds = None
10516       self.dest_disk_info = None
10517       self.dest_x509_ca = None
10518
10519     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10520       self.dst_node = None
10521
10522       if len(self.op.target_node) != len(self.instance.disks):
10523         raise errors.OpPrereqError(("Received destination information for %s"
10524                                     " disks, but instance %s has %s disks") %
10525                                    (len(self.op.target_node), instance_name,
10526                                     len(self.instance.disks)),
10527                                    errors.ECODE_INVAL)
10528
10529       cds = _GetClusterDomainSecret()
10530
10531       # Check X509 key name
10532       try:
10533         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10534       except (TypeError, ValueError), err:
10535         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10536
10537       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10538         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10539                                    errors.ECODE_INVAL)
10540
10541       # Load and verify CA
10542       try:
10543         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10544       except OpenSSL.crypto.Error, err:
10545         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10546                                    (err, ), errors.ECODE_INVAL)
10547
10548       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10549       if errcode is not None:
10550         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10551                                    (msg, ), errors.ECODE_INVAL)
10552
10553       self.dest_x509_ca = cert
10554
10555       # Verify target information
10556       disk_info = []
10557       for idx, disk_data in enumerate(self.op.target_node):
10558         try:
10559           (host, port, magic) = \
10560             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10561         except errors.GenericError, err:
10562           raise errors.OpPrereqError("Target info for disk %s: %s" %
10563                                      (idx, err), errors.ECODE_INVAL)
10564
10565         disk_info.append((host, port, magic))
10566
10567       assert len(disk_info) == len(self.op.target_node)
10568       self.dest_disk_info = disk_info
10569
10570     else:
10571       raise errors.ProgrammerError("Unhandled export mode %r" %
10572                                    self.op.mode)
10573
10574     # instance disk type verification
10575     # TODO: Implement export support for file-based disks
10576     for disk in self.instance.disks:
10577       if disk.dev_type == constants.LD_FILE:
10578         raise errors.OpPrereqError("Export not supported for instances with"
10579                                    " file-based disks", errors.ECODE_INVAL)
10580
10581   def _CleanupExports(self, feedback_fn):
10582     """Removes exports of current instance from all other nodes.
10583
10584     If an instance in a cluster with nodes A..D was exported to node C, its
10585     exports will be removed from the nodes A, B and D.
10586
10587     """
10588     assert self.op.mode != constants.EXPORT_MODE_REMOTE
10589
10590     nodelist = self.cfg.GetNodeList()
10591     nodelist.remove(self.dst_node.name)
10592
10593     # on one-node clusters nodelist will be empty after the removal
10594     # if we proceed the backup would be removed because OpBackupQuery
10595     # substitutes an empty list with the full cluster node list.
10596     iname = self.instance.name
10597     if nodelist:
10598       feedback_fn("Removing old exports for instance %s" % iname)
10599       exportlist = self.rpc.call_export_list(nodelist)
10600       for node in exportlist:
10601         if exportlist[node].fail_msg:
10602           continue
10603         if iname in exportlist[node].payload:
10604           msg = self.rpc.call_export_remove(node, iname).fail_msg
10605           if msg:
10606             self.LogWarning("Could not remove older export for instance %s"
10607                             " on node %s: %s", iname, node, msg)
10608
10609   def Exec(self, feedback_fn):
10610     """Export an instance to an image in the cluster.
10611
10612     """
10613     assert self.op.mode in constants.EXPORT_MODES
10614
10615     instance = self.instance
10616     src_node = instance.primary_node
10617
10618     if self.op.shutdown:
10619       # shutdown the instance, but not the disks
10620       feedback_fn("Shutting down instance %s" % instance.name)
10621       result = self.rpc.call_instance_shutdown(src_node, instance,
10622                                                self.op.shutdown_timeout)
10623       # TODO: Maybe ignore failures if ignore_remove_failures is set
10624       result.Raise("Could not shutdown instance %s on"
10625                    " node %s" % (instance.name, src_node))
10626
10627     # set the disks ID correctly since call_instance_start needs the
10628     # correct drbd minor to create the symlinks
10629     for disk in instance.disks:
10630       self.cfg.SetDiskID(disk, src_node)
10631
10632     activate_disks = (not instance.admin_up)
10633
10634     if activate_disks:
10635       # Activate the instance disks if we'exporting a stopped instance
10636       feedback_fn("Activating disks for %s" % instance.name)
10637       _StartInstanceDisks(self, instance, None)
10638
10639     try:
10640       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10641                                                      instance)
10642
10643       helper.CreateSnapshots()
10644       try:
10645         if (self.op.shutdown and instance.admin_up and
10646             not self.op.remove_instance):
10647           assert not activate_disks
10648           feedback_fn("Starting instance %s" % instance.name)
10649           result = self.rpc.call_instance_start(src_node, instance, None, None)
10650           msg = result.fail_msg
10651           if msg:
10652             feedback_fn("Failed to start instance: %s" % msg)
10653             _ShutdownInstanceDisks(self, instance)
10654             raise errors.OpExecError("Could not start instance: %s" % msg)
10655
10656         if self.op.mode == constants.EXPORT_MODE_LOCAL:
10657           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10658         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10659           connect_timeout = constants.RIE_CONNECT_TIMEOUT
10660           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10661
10662           (key_name, _, _) = self.x509_key_name
10663
10664           dest_ca_pem = \
10665             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10666                                             self.dest_x509_ca)
10667
10668           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10669                                                      key_name, dest_ca_pem,
10670                                                      timeouts)
10671       finally:
10672         helper.Cleanup()
10673
10674       # Check for backwards compatibility
10675       assert len(dresults) == len(instance.disks)
10676       assert compat.all(isinstance(i, bool) for i in dresults), \
10677              "Not all results are boolean: %r" % dresults
10678
10679     finally:
10680       if activate_disks:
10681         feedback_fn("Deactivating disks for %s" % instance.name)
10682         _ShutdownInstanceDisks(self, instance)
10683
10684     if not (compat.all(dresults) and fin_resu):
10685       failures = []
10686       if not fin_resu:
10687         failures.append("export finalization")
10688       if not compat.all(dresults):
10689         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10690                                if not dsk)
10691         failures.append("disk export: disk(s) %s" % fdsk)
10692
10693       raise errors.OpExecError("Export failed, errors in %s" %
10694                                utils.CommaJoin(failures))
10695
10696     # At this point, the export was successful, we can cleanup/finish
10697
10698     # Remove instance if requested
10699     if self.op.remove_instance:
10700       feedback_fn("Removing instance %s" % instance.name)
10701       _RemoveInstance(self, feedback_fn, instance,
10702                       self.op.ignore_remove_failures)
10703
10704     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10705       self._CleanupExports(feedback_fn)
10706
10707     return fin_resu, dresults
10708
10709
10710 class LUBackupRemove(NoHooksLU):
10711   """Remove exports related to the named instance.
10712
10713   """
10714   REQ_BGL = False
10715
10716   def ExpandNames(self):
10717     self.needed_locks = {}
10718     # We need all nodes to be locked in order for RemoveExport to work, but we
10719     # don't need to lock the instance itself, as nothing will happen to it (and
10720     # we can remove exports also for a removed instance)
10721     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10722
10723   def Exec(self, feedback_fn):
10724     """Remove any export.
10725
10726     """
10727     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10728     # If the instance was not found we'll try with the name that was passed in.
10729     # This will only work if it was an FQDN, though.
10730     fqdn_warn = False
10731     if not instance_name:
10732       fqdn_warn = True
10733       instance_name = self.op.instance_name
10734
10735     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10736     exportlist = self.rpc.call_export_list(locked_nodes)
10737     found = False
10738     for node in exportlist:
10739       msg = exportlist[node].fail_msg
10740       if msg:
10741         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10742         continue
10743       if instance_name in exportlist[node].payload:
10744         found = True
10745         result = self.rpc.call_export_remove(node, instance_name)
10746         msg = result.fail_msg
10747         if msg:
10748           logging.error("Could not remove export for instance %s"
10749                         " on node %s: %s", instance_name, node, msg)
10750
10751     if fqdn_warn and not found:
10752       feedback_fn("Export not found. If trying to remove an export belonging"
10753                   " to a deleted instance please use its Fully Qualified"
10754                   " Domain Name.")
10755
10756
10757 class LUGroupAdd(LogicalUnit):
10758   """Logical unit for creating node groups.
10759
10760   """
10761   HPATH = "group-add"
10762   HTYPE = constants.HTYPE_GROUP
10763   REQ_BGL = False
10764
10765   def ExpandNames(self):
10766     # We need the new group's UUID here so that we can create and acquire the
10767     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10768     # that it should not check whether the UUID exists in the configuration.
10769     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10770     self.needed_locks = {}
10771     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10772
10773   def CheckPrereq(self):
10774     """Check prerequisites.
10775
10776     This checks that the given group name is not an existing node group
10777     already.
10778
10779     """
10780     try:
10781       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10782     except errors.OpPrereqError:
10783       pass
10784     else:
10785       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10786                                  " node group (UUID: %s)" %
10787                                  (self.op.group_name, existing_uuid),
10788                                  errors.ECODE_EXISTS)
10789
10790     if self.op.ndparams:
10791       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10792
10793   def BuildHooksEnv(self):
10794     """Build hooks env.
10795
10796     """
10797     return {
10798       "GROUP_NAME": self.op.group_name,
10799       }
10800
10801   def BuildHooksNodes(self):
10802     """Build hooks nodes.
10803
10804     """
10805     mn = self.cfg.GetMasterNode()
10806     return ([mn], [mn])
10807
10808   def Exec(self, feedback_fn):
10809     """Add the node group to the cluster.
10810
10811     """
10812     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10813                                   uuid=self.group_uuid,
10814                                   alloc_policy=self.op.alloc_policy,
10815                                   ndparams=self.op.ndparams)
10816
10817     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10818     del self.remove_locks[locking.LEVEL_NODEGROUP]
10819
10820
10821 class LUGroupAssignNodes(NoHooksLU):
10822   """Logical unit for assigning nodes to groups.
10823
10824   """
10825   REQ_BGL = False
10826
10827   def ExpandNames(self):
10828     # These raise errors.OpPrereqError on their own:
10829     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10830     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10831
10832     # We want to lock all the affected nodes and groups. We have readily
10833     # available the list of nodes, and the *destination* group. To gather the
10834     # list of "source" groups, we need to fetch node information.
10835     self.node_data = self.cfg.GetAllNodesInfo()
10836     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10837     affected_groups.add(self.group_uuid)
10838
10839     self.needed_locks = {
10840       locking.LEVEL_NODEGROUP: list(affected_groups),
10841       locking.LEVEL_NODE: self.op.nodes,
10842       }
10843
10844   def CheckPrereq(self):
10845     """Check prerequisites.
10846
10847     """
10848     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10849     instance_data = self.cfg.GetAllInstancesInfo()
10850
10851     if self.group is None:
10852       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10853                                (self.op.group_name, self.group_uuid))
10854
10855     (new_splits, previous_splits) = \
10856       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10857                                              for node in self.op.nodes],
10858                                             self.node_data, instance_data)
10859
10860     if new_splits:
10861       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10862
10863       if not self.op.force:
10864         raise errors.OpExecError("The following instances get split by this"
10865                                  " change and --force was not given: %s" %
10866                                  fmt_new_splits)
10867       else:
10868         self.LogWarning("This operation will split the following instances: %s",
10869                         fmt_new_splits)
10870
10871         if previous_splits:
10872           self.LogWarning("In addition, these already-split instances continue"
10873                           " to be split across groups: %s",
10874                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10875
10876   def Exec(self, feedback_fn):
10877     """Assign nodes to a new group.
10878
10879     """
10880     for node in self.op.nodes:
10881       self.node_data[node].group = self.group_uuid
10882
10883     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10884
10885   @staticmethod
10886   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10887     """Check for split instances after a node assignment.
10888
10889     This method considers a series of node assignments as an atomic operation,
10890     and returns information about split instances after applying the set of
10891     changes.
10892
10893     In particular, it returns information about newly split instances, and
10894     instances that were already split, and remain so after the change.
10895
10896     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10897     considered.
10898
10899     @type changes: list of (node_name, new_group_uuid) pairs.
10900     @param changes: list of node assignments to consider.
10901     @param node_data: a dict with data for all nodes
10902     @param instance_data: a dict with all instances to consider
10903     @rtype: a two-tuple
10904     @return: a list of instances that were previously okay and result split as a
10905       consequence of this change, and a list of instances that were previously
10906       split and this change does not fix.
10907
10908     """
10909     changed_nodes = dict((node, group) for node, group in changes
10910                          if node_data[node].group != group)
10911
10912     all_split_instances = set()
10913     previously_split_instances = set()
10914
10915     def InstanceNodes(instance):
10916       return [instance.primary_node] + list(instance.secondary_nodes)
10917
10918     for inst in instance_data.values():
10919       if inst.disk_template not in constants.DTS_INT_MIRROR:
10920         continue
10921
10922       instance_nodes = InstanceNodes(inst)
10923
10924       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10925         previously_split_instances.add(inst.name)
10926
10927       if len(set(changed_nodes.get(node, node_data[node].group)
10928                  for node in instance_nodes)) > 1:
10929         all_split_instances.add(inst.name)
10930
10931     return (list(all_split_instances - previously_split_instances),
10932             list(previously_split_instances & all_split_instances))
10933
10934
10935 class _GroupQuery(_QueryBase):
10936   FIELDS = query.GROUP_FIELDS
10937
10938   def ExpandNames(self, lu):
10939     lu.needed_locks = {}
10940
10941     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10942     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10943
10944     if not self.names:
10945       self.wanted = [name_to_uuid[name]
10946                      for name in utils.NiceSort(name_to_uuid.keys())]
10947     else:
10948       # Accept names to be either names or UUIDs.
10949       missing = []
10950       self.wanted = []
10951       all_uuid = frozenset(self._all_groups.keys())
10952
10953       for name in self.names:
10954         if name in all_uuid:
10955           self.wanted.append(name)
10956         elif name in name_to_uuid:
10957           self.wanted.append(name_to_uuid[name])
10958         else:
10959           missing.append(name)
10960
10961       if missing:
10962         raise errors.OpPrereqError("Some groups do not exist: %s" %
10963                                    utils.CommaJoin(missing),
10964                                    errors.ECODE_NOENT)
10965
10966   def DeclareLocks(self, lu, level):
10967     pass
10968
10969   def _GetQueryData(self, lu):
10970     """Computes the list of node groups and their attributes.
10971
10972     """
10973     do_nodes = query.GQ_NODE in self.requested_data
10974     do_instances = query.GQ_INST in self.requested_data
10975
10976     group_to_nodes = None
10977     group_to_instances = None
10978
10979     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10980     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10981     # latter GetAllInstancesInfo() is not enough, for we have to go through
10982     # instance->node. Hence, we will need to process nodes even if we only need
10983     # instance information.
10984     if do_nodes or do_instances:
10985       all_nodes = lu.cfg.GetAllNodesInfo()
10986       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10987       node_to_group = {}
10988
10989       for node in all_nodes.values():
10990         if node.group in group_to_nodes:
10991           group_to_nodes[node.group].append(node.name)
10992           node_to_group[node.name] = node.group
10993
10994       if do_instances:
10995         all_instances = lu.cfg.GetAllInstancesInfo()
10996         group_to_instances = dict((uuid, []) for uuid in self.wanted)
10997
10998         for instance in all_instances.values():
10999           node = instance.primary_node
11000           if node in node_to_group:
11001             group_to_instances[node_to_group[node]].append(instance.name)
11002
11003         if not do_nodes:
11004           # Do not pass on node information if it was not requested.
11005           group_to_nodes = None
11006
11007     return query.GroupQueryData([self._all_groups[uuid]
11008                                  for uuid in self.wanted],
11009                                 group_to_nodes, group_to_instances)
11010
11011
11012 class LUGroupQuery(NoHooksLU):
11013   """Logical unit for querying node groups.
11014
11015   """
11016   REQ_BGL = False
11017
11018   def CheckArguments(self):
11019     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11020                           self.op.output_fields, False)
11021
11022   def ExpandNames(self):
11023     self.gq.ExpandNames(self)
11024
11025   def Exec(self, feedback_fn):
11026     return self.gq.OldStyleQuery(self)
11027
11028
11029 class LUGroupSetParams(LogicalUnit):
11030   """Modifies the parameters of a node group.
11031
11032   """
11033   HPATH = "group-modify"
11034   HTYPE = constants.HTYPE_GROUP
11035   REQ_BGL = False
11036
11037   def CheckArguments(self):
11038     all_changes = [
11039       self.op.ndparams,
11040       self.op.alloc_policy,
11041       ]
11042
11043     if all_changes.count(None) == len(all_changes):
11044       raise errors.OpPrereqError("Please pass at least one modification",
11045                                  errors.ECODE_INVAL)
11046
11047   def ExpandNames(self):
11048     # This raises errors.OpPrereqError on its own:
11049     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11050
11051     self.needed_locks = {
11052       locking.LEVEL_NODEGROUP: [self.group_uuid],
11053       }
11054
11055   def CheckPrereq(self):
11056     """Check prerequisites.
11057
11058     """
11059     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11060
11061     if self.group is None:
11062       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11063                                (self.op.group_name, self.group_uuid))
11064
11065     if self.op.ndparams:
11066       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11067       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11068       self.new_ndparams = new_ndparams
11069
11070   def BuildHooksEnv(self):
11071     """Build hooks env.
11072
11073     """
11074     return {
11075       "GROUP_NAME": self.op.group_name,
11076       "NEW_ALLOC_POLICY": self.op.alloc_policy,
11077       }
11078
11079   def BuildHooksNodes(self):
11080     """Build hooks nodes.
11081
11082     """
11083     mn = self.cfg.GetMasterNode()
11084     return ([mn], [mn])
11085
11086   def Exec(self, feedback_fn):
11087     """Modifies the node group.
11088
11089     """
11090     result = []
11091
11092     if self.op.ndparams:
11093       self.group.ndparams = self.new_ndparams
11094       result.append(("ndparams", str(self.group.ndparams)))
11095
11096     if self.op.alloc_policy:
11097       self.group.alloc_policy = self.op.alloc_policy
11098
11099     self.cfg.Update(self.group, feedback_fn)
11100     return result
11101
11102
11103
11104 class LUGroupRemove(LogicalUnit):
11105   HPATH = "group-remove"
11106   HTYPE = constants.HTYPE_GROUP
11107   REQ_BGL = False
11108
11109   def ExpandNames(self):
11110     # This will raises errors.OpPrereqError on its own:
11111     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11112     self.needed_locks = {
11113       locking.LEVEL_NODEGROUP: [self.group_uuid],
11114       }
11115
11116   def CheckPrereq(self):
11117     """Check prerequisites.
11118
11119     This checks that the given group name exists as a node group, that is
11120     empty (i.e., contains no nodes), and that is not the last group of the
11121     cluster.
11122
11123     """
11124     # Verify that the group is empty.
11125     group_nodes = [node.name
11126                    for node in self.cfg.GetAllNodesInfo().values()
11127                    if node.group == self.group_uuid]
11128
11129     if group_nodes:
11130       raise errors.OpPrereqError("Group '%s' not empty, has the following"
11131                                  " nodes: %s" %
11132                                  (self.op.group_name,
11133                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
11134                                  errors.ECODE_STATE)
11135
11136     # Verify the cluster would not be left group-less.
11137     if len(self.cfg.GetNodeGroupList()) == 1:
11138       raise errors.OpPrereqError("Group '%s' is the only group,"
11139                                  " cannot be removed" %
11140                                  self.op.group_name,
11141                                  errors.ECODE_STATE)
11142
11143   def BuildHooksEnv(self):
11144     """Build hooks env.
11145
11146     """
11147     return {
11148       "GROUP_NAME": self.op.group_name,
11149       }
11150
11151   def BuildHooksNodes(self):
11152     """Build hooks nodes.
11153
11154     """
11155     mn = self.cfg.GetMasterNode()
11156     return ([mn], [mn])
11157
11158   def Exec(self, feedback_fn):
11159     """Remove the node group.
11160
11161     """
11162     try:
11163       self.cfg.RemoveNodeGroup(self.group_uuid)
11164     except errors.ConfigurationError:
11165       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11166                                (self.op.group_name, self.group_uuid))
11167
11168     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11169
11170
11171 class LUGroupRename(LogicalUnit):
11172   HPATH = "group-rename"
11173   HTYPE = constants.HTYPE_GROUP
11174   REQ_BGL = False
11175
11176   def ExpandNames(self):
11177     # This raises errors.OpPrereqError on its own:
11178     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11179
11180     self.needed_locks = {
11181       locking.LEVEL_NODEGROUP: [self.group_uuid],
11182       }
11183
11184   def CheckPrereq(self):
11185     """Check prerequisites.
11186
11187     Ensures requested new name is not yet used.
11188
11189     """
11190     try:
11191       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11192     except errors.OpPrereqError:
11193       pass
11194     else:
11195       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11196                                  " node group (UUID: %s)" %
11197                                  (self.op.new_name, new_name_uuid),
11198                                  errors.ECODE_EXISTS)
11199
11200   def BuildHooksEnv(self):
11201     """Build hooks env.
11202
11203     """
11204     return {
11205       "OLD_NAME": self.op.group_name,
11206       "NEW_NAME": self.op.new_name,
11207       }
11208
11209   def BuildHooksNodes(self):
11210     """Build hooks nodes.
11211
11212     """
11213     mn = self.cfg.GetMasterNode()
11214
11215     all_nodes = self.cfg.GetAllNodesInfo()
11216     all_nodes.pop(mn, None)
11217
11218     run_nodes = [mn]
11219     run_nodes.extend(node.name for node in all_nodes.values()
11220                      if node.group == self.group_uuid)
11221
11222     return (run_nodes, run_nodes)
11223
11224   def Exec(self, feedback_fn):
11225     """Rename the node group.
11226
11227     """
11228     group = self.cfg.GetNodeGroup(self.group_uuid)
11229
11230     if group is None:
11231       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11232                                (self.op.group_name, self.group_uuid))
11233
11234     group.name = self.op.new_name
11235     self.cfg.Update(group, feedback_fn)
11236
11237     return self.op.new_name
11238
11239
11240 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11241   """Generic tags LU.
11242
11243   This is an abstract class which is the parent of all the other tags LUs.
11244
11245   """
11246   def ExpandNames(self):
11247     self.group_uuid = None
11248     self.needed_locks = {}
11249     if self.op.kind == constants.TAG_NODE:
11250       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11251       self.needed_locks[locking.LEVEL_NODE] = self.op.name
11252     elif self.op.kind == constants.TAG_INSTANCE:
11253       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11254       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11255     elif self.op.kind == constants.TAG_NODEGROUP:
11256       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11257
11258     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11259     # not possible to acquire the BGL based on opcode parameters)
11260
11261   def CheckPrereq(self):
11262     """Check prerequisites.
11263
11264     """
11265     if self.op.kind == constants.TAG_CLUSTER:
11266       self.target = self.cfg.GetClusterInfo()
11267     elif self.op.kind == constants.TAG_NODE:
11268       self.target = self.cfg.GetNodeInfo(self.op.name)
11269     elif self.op.kind == constants.TAG_INSTANCE:
11270       self.target = self.cfg.GetInstanceInfo(self.op.name)
11271     elif self.op.kind == constants.TAG_NODEGROUP:
11272       self.target = self.cfg.GetNodeGroup(self.group_uuid)
11273     else:
11274       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11275                                  str(self.op.kind), errors.ECODE_INVAL)
11276
11277
11278 class LUTagsGet(TagsLU):
11279   """Returns the tags of a given object.
11280
11281   """
11282   REQ_BGL = False
11283
11284   def ExpandNames(self):
11285     TagsLU.ExpandNames(self)
11286
11287     # Share locks as this is only a read operation
11288     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11289
11290   def Exec(self, feedback_fn):
11291     """Returns the tag list.
11292
11293     """
11294     return list(self.target.GetTags())
11295
11296
11297 class LUTagsSearch(NoHooksLU):
11298   """Searches the tags for a given pattern.
11299
11300   """
11301   REQ_BGL = False
11302
11303   def ExpandNames(self):
11304     self.needed_locks = {}
11305
11306   def CheckPrereq(self):
11307     """Check prerequisites.
11308
11309     This checks the pattern passed for validity by compiling it.
11310
11311     """
11312     try:
11313       self.re = re.compile(self.op.pattern)
11314     except re.error, err:
11315       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11316                                  (self.op.pattern, err), errors.ECODE_INVAL)
11317
11318   def Exec(self, feedback_fn):
11319     """Returns the tag list.
11320
11321     """
11322     cfg = self.cfg
11323     tgts = [("/cluster", cfg.GetClusterInfo())]
11324     ilist = cfg.GetAllInstancesInfo().values()
11325     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11326     nlist = cfg.GetAllNodesInfo().values()
11327     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11328     tgts.extend(("/nodegroup/%s" % n.name, n)
11329                 for n in cfg.GetAllNodeGroupsInfo().values())
11330     results = []
11331     for path, target in tgts:
11332       for tag in target.GetTags():
11333         if self.re.search(tag):
11334           results.append((path, tag))
11335     return results
11336
11337
11338 class LUTagsSet(TagsLU):
11339   """Sets a tag on a given object.
11340
11341   """
11342   REQ_BGL = False
11343
11344   def CheckPrereq(self):
11345     """Check prerequisites.
11346
11347     This checks the type and length of the tag name and value.
11348
11349     """
11350     TagsLU.CheckPrereq(self)
11351     for tag in self.op.tags:
11352       objects.TaggableObject.ValidateTag(tag)
11353
11354   def Exec(self, feedback_fn):
11355     """Sets the tag.
11356
11357     """
11358     try:
11359       for tag in self.op.tags:
11360         self.target.AddTag(tag)
11361     except errors.TagError, err:
11362       raise errors.OpExecError("Error while setting tag: %s" % str(err))
11363     self.cfg.Update(self.target, feedback_fn)
11364
11365
11366 class LUTagsDel(TagsLU):
11367   """Delete a list of tags from a given object.
11368
11369   """
11370   REQ_BGL = False
11371
11372   def CheckPrereq(self):
11373     """Check prerequisites.
11374
11375     This checks that we have the given tag.
11376
11377     """
11378     TagsLU.CheckPrereq(self)
11379     for tag in self.op.tags:
11380       objects.TaggableObject.ValidateTag(tag)
11381     del_tags = frozenset(self.op.tags)
11382     cur_tags = self.target.GetTags()
11383
11384     diff_tags = del_tags - cur_tags
11385     if diff_tags:
11386       diff_names = ("'%s'" % i for i in sorted(diff_tags))
11387       raise errors.OpPrereqError("Tag(s) %s not found" %
11388                                  (utils.CommaJoin(diff_names), ),
11389                                  errors.ECODE_NOENT)
11390
11391   def Exec(self, feedback_fn):
11392     """Remove the tag from the object.
11393
11394     """
11395     for tag in self.op.tags:
11396       self.target.RemoveTag(tag)
11397     self.cfg.Update(self.target, feedback_fn)
11398
11399
11400 class LUTestDelay(NoHooksLU):
11401   """Sleep for a specified amount of time.
11402
11403   This LU sleeps on the master and/or nodes for a specified amount of
11404   time.
11405
11406   """
11407   REQ_BGL = False
11408
11409   def ExpandNames(self):
11410     """Expand names and set required locks.
11411
11412     This expands the node list, if any.
11413
11414     """
11415     self.needed_locks = {}
11416     if self.op.on_nodes:
11417       # _GetWantedNodes can be used here, but is not always appropriate to use
11418       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11419       # more information.
11420       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11421       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11422
11423   def _TestDelay(self):
11424     """Do the actual sleep.
11425
11426     """
11427     if self.op.on_master:
11428       if not utils.TestDelay(self.op.duration):
11429         raise errors.OpExecError("Error during master delay test")
11430     if self.op.on_nodes:
11431       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11432       for node, node_result in result.items():
11433         node_result.Raise("Failure during rpc call to node %s" % node)
11434
11435   def Exec(self, feedback_fn):
11436     """Execute the test delay opcode, with the wanted repetitions.
11437
11438     """
11439     if self.op.repeat == 0:
11440       self._TestDelay()
11441     else:
11442       top_value = self.op.repeat - 1
11443       for i in range(self.op.repeat):
11444         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11445         self._TestDelay()
11446
11447
11448 class LUTestJqueue(NoHooksLU):
11449   """Utility LU to test some aspects of the job queue.
11450
11451   """
11452   REQ_BGL = False
11453
11454   # Must be lower than default timeout for WaitForJobChange to see whether it
11455   # notices changed jobs
11456   _CLIENT_CONNECT_TIMEOUT = 20.0
11457   _CLIENT_CONFIRM_TIMEOUT = 60.0
11458
11459   @classmethod
11460   def _NotifyUsingSocket(cls, cb, errcls):
11461     """Opens a Unix socket and waits for another program to connect.
11462
11463     @type cb: callable
11464     @param cb: Callback to send socket name to client
11465     @type errcls: class
11466     @param errcls: Exception class to use for errors
11467
11468     """
11469     # Using a temporary directory as there's no easy way to create temporary
11470     # sockets without writing a custom loop around tempfile.mktemp and
11471     # socket.bind
11472     tmpdir = tempfile.mkdtemp()
11473     try:
11474       tmpsock = utils.PathJoin(tmpdir, "sock")
11475
11476       logging.debug("Creating temporary socket at %s", tmpsock)
11477       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11478       try:
11479         sock.bind(tmpsock)
11480         sock.listen(1)
11481
11482         # Send details to client
11483         cb(tmpsock)
11484
11485         # Wait for client to connect before continuing
11486         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11487         try:
11488           (conn, _) = sock.accept()
11489         except socket.error, err:
11490           raise errcls("Client didn't connect in time (%s)" % err)
11491       finally:
11492         sock.close()
11493     finally:
11494       # Remove as soon as client is connected
11495       shutil.rmtree(tmpdir)
11496
11497     # Wait for client to close
11498     try:
11499       try:
11500         # pylint: disable-msg=E1101
11501         # Instance of '_socketobject' has no ... member
11502         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11503         conn.recv(1)
11504       except socket.error, err:
11505         raise errcls("Client failed to confirm notification (%s)" % err)
11506     finally:
11507       conn.close()
11508
11509   def _SendNotification(self, test, arg, sockname):
11510     """Sends a notification to the client.
11511
11512     @type test: string
11513     @param test: Test name
11514     @param arg: Test argument (depends on test)
11515     @type sockname: string
11516     @param sockname: Socket path
11517
11518     """
11519     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11520
11521   def _Notify(self, prereq, test, arg):
11522     """Notifies the client of a test.
11523
11524     @type prereq: bool
11525     @param prereq: Whether this is a prereq-phase test
11526     @type test: string
11527     @param test: Test name
11528     @param arg: Test argument (depends on test)
11529
11530     """
11531     if prereq:
11532       errcls = errors.OpPrereqError
11533     else:
11534       errcls = errors.OpExecError
11535
11536     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11537                                                   test, arg),
11538                                    errcls)
11539
11540   def CheckArguments(self):
11541     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11542     self.expandnames_calls = 0
11543
11544   def ExpandNames(self):
11545     checkargs_calls = getattr(self, "checkargs_calls", 0)
11546     if checkargs_calls < 1:
11547       raise errors.ProgrammerError("CheckArguments was not called")
11548
11549     self.expandnames_calls += 1
11550
11551     if self.op.notify_waitlock:
11552       self._Notify(True, constants.JQT_EXPANDNAMES, None)
11553
11554     self.LogInfo("Expanding names")
11555
11556     # Get lock on master node (just to get a lock, not for a particular reason)
11557     self.needed_locks = {
11558       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11559       }
11560
11561   def Exec(self, feedback_fn):
11562     if self.expandnames_calls < 1:
11563       raise errors.ProgrammerError("ExpandNames was not called")
11564
11565     if self.op.notify_exec:
11566       self._Notify(False, constants.JQT_EXEC, None)
11567
11568     self.LogInfo("Executing")
11569
11570     if self.op.log_messages:
11571       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11572       for idx, msg in enumerate(self.op.log_messages):
11573         self.LogInfo("Sending log message %s", idx + 1)
11574         feedback_fn(constants.JQT_MSGPREFIX + msg)
11575         # Report how many test messages have been sent
11576         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11577
11578     if self.op.fail:
11579       raise errors.OpExecError("Opcode failure was requested")
11580
11581     return True
11582
11583
11584 class IAllocator(object):
11585   """IAllocator framework.
11586
11587   An IAllocator instance has three sets of attributes:
11588     - cfg that is needed to query the cluster
11589     - input data (all members of the _KEYS class attribute are required)
11590     - four buffer attributes (in|out_data|text), that represent the
11591       input (to the external script) in text and data structure format,
11592       and the output from it, again in two formats
11593     - the result variables from the script (success, info, nodes) for
11594       easy usage
11595
11596   """
11597   # pylint: disable-msg=R0902
11598   # lots of instance attributes
11599   _ALLO_KEYS = [
11600     "name", "mem_size", "disks", "disk_template",
11601     "os", "tags", "nics", "vcpus", "hypervisor",
11602     ]
11603   _RELO_KEYS = [
11604     "name", "relocate_from",
11605     ]
11606   _EVAC_KEYS = [
11607     "evac_nodes",
11608     ]
11609
11610   def __init__(self, cfg, rpc, mode, **kwargs):
11611     self.cfg = cfg
11612     self.rpc = rpc
11613     # init buffer variables
11614     self.in_text = self.out_text = self.in_data = self.out_data = None
11615     # init all input fields so that pylint is happy
11616     self.mode = mode
11617     self.mem_size = self.disks = self.disk_template = None
11618     self.os = self.tags = self.nics = self.vcpus = None
11619     self.hypervisor = None
11620     self.relocate_from = None
11621     self.name = None
11622     self.evac_nodes = None
11623     # computed fields
11624     self.required_nodes = None
11625     # init result fields
11626     self.success = self.info = self.result = None
11627     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11628       keyset = self._ALLO_KEYS
11629       fn = self._AddNewInstance
11630     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11631       keyset = self._RELO_KEYS
11632       fn = self._AddRelocateInstance
11633     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11634       keyset = self._EVAC_KEYS
11635       fn = self._AddEvacuateNodes
11636     else:
11637       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11638                                    " IAllocator" % self.mode)
11639     for key in kwargs:
11640       if key not in keyset:
11641         raise errors.ProgrammerError("Invalid input parameter '%s' to"
11642                                      " IAllocator" % key)
11643       setattr(self, key, kwargs[key])
11644
11645     for key in keyset:
11646       if key not in kwargs:
11647         raise errors.ProgrammerError("Missing input parameter '%s' to"
11648                                      " IAllocator" % key)
11649     self._BuildInputData(fn)
11650
11651   def _ComputeClusterData(self):
11652     """Compute the generic allocator input data.
11653
11654     This is the data that is independent of the actual operation.
11655
11656     """
11657     cfg = self.cfg
11658     cluster_info = cfg.GetClusterInfo()
11659     # cluster data
11660     data = {
11661       "version": constants.IALLOCATOR_VERSION,
11662       "cluster_name": cfg.GetClusterName(),
11663       "cluster_tags": list(cluster_info.GetTags()),
11664       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11665       # we don't have job IDs
11666       }
11667     ninfo = cfg.GetAllNodesInfo()
11668     iinfo = cfg.GetAllInstancesInfo().values()
11669     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11670
11671     # node data
11672     node_list = [n.name for n in ninfo.values() if n.vm_capable]
11673
11674     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11675       hypervisor_name = self.hypervisor
11676     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11677       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11678     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11679       hypervisor_name = cluster_info.enabled_hypervisors[0]
11680
11681     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11682                                         hypervisor_name)
11683     node_iinfo = \
11684       self.rpc.call_all_instances_info(node_list,
11685                                        cluster_info.enabled_hypervisors)
11686
11687     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11688
11689     config_ndata = self._ComputeBasicNodeData(ninfo)
11690     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11691                                                  i_list, config_ndata)
11692     assert len(data["nodes"]) == len(ninfo), \
11693         "Incomplete node data computed"
11694
11695     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11696
11697     self.in_data = data
11698
11699   @staticmethod
11700   def _ComputeNodeGroupData(cfg):
11701     """Compute node groups data.
11702
11703     """
11704     ng = {}
11705     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11706       ng[guuid] = {
11707         "name": gdata.name,
11708         "alloc_policy": gdata.alloc_policy,
11709         }
11710     return ng
11711
11712   @staticmethod
11713   def _ComputeBasicNodeData(node_cfg):
11714     """Compute global node data.
11715
11716     @rtype: dict
11717     @returns: a dict of name: (node dict, node config)
11718
11719     """
11720     node_results = {}
11721     for ninfo in node_cfg.values():
11722       # fill in static (config-based) values
11723       pnr = {
11724         "tags": list(ninfo.GetTags()),
11725         "primary_ip": ninfo.primary_ip,
11726         "secondary_ip": ninfo.secondary_ip,
11727         "offline": ninfo.offline,
11728         "drained": ninfo.drained,
11729         "master_candidate": ninfo.master_candidate,
11730         "group": ninfo.group,
11731         "master_capable": ninfo.master_capable,
11732         "vm_capable": ninfo.vm_capable,
11733         }
11734
11735       node_results[ninfo.name] = pnr
11736
11737     return node_results
11738
11739   @staticmethod
11740   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11741                               node_results):
11742     """Compute global node data.
11743
11744     @param node_results: the basic node structures as filled from the config
11745
11746     """
11747     # make a copy of the current dict
11748     node_results = dict(node_results)
11749     for nname, nresult in node_data.items():
11750       assert nname in node_results, "Missing basic data for node %s" % nname
11751       ninfo = node_cfg[nname]
11752
11753       if not (ninfo.offline or ninfo.drained):
11754         nresult.Raise("Can't get data for node %s" % nname)
11755         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11756                                 nname)
11757         remote_info = nresult.payload
11758
11759         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11760                      'vg_size', 'vg_free', 'cpu_total']:
11761           if attr not in remote_info:
11762             raise errors.OpExecError("Node '%s' didn't return attribute"
11763                                      " '%s'" % (nname, attr))
11764           if not isinstance(remote_info[attr], int):
11765             raise errors.OpExecError("Node '%s' returned invalid value"
11766                                      " for '%s': %s" %
11767                                      (nname, attr, remote_info[attr]))
11768         # compute memory used by primary instances
11769         i_p_mem = i_p_up_mem = 0
11770         for iinfo, beinfo in i_list:
11771           if iinfo.primary_node == nname:
11772             i_p_mem += beinfo[constants.BE_MEMORY]
11773             if iinfo.name not in node_iinfo[nname].payload:
11774               i_used_mem = 0
11775             else:
11776               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11777             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11778             remote_info['memory_free'] -= max(0, i_mem_diff)
11779
11780             if iinfo.admin_up:
11781               i_p_up_mem += beinfo[constants.BE_MEMORY]
11782
11783         # compute memory used by instances
11784         pnr_dyn = {
11785           "total_memory": remote_info['memory_total'],
11786           "reserved_memory": remote_info['memory_dom0'],
11787           "free_memory": remote_info['memory_free'],
11788           "total_disk": remote_info['vg_size'],
11789           "free_disk": remote_info['vg_free'],
11790           "total_cpus": remote_info['cpu_total'],
11791           "i_pri_memory": i_p_mem,
11792           "i_pri_up_memory": i_p_up_mem,
11793           }
11794         pnr_dyn.update(node_results[nname])
11795         node_results[nname] = pnr_dyn
11796
11797     return node_results
11798
11799   @staticmethod
11800   def _ComputeInstanceData(cluster_info, i_list):
11801     """Compute global instance data.
11802
11803     """
11804     instance_data = {}
11805     for iinfo, beinfo in i_list:
11806       nic_data = []
11807       for nic in iinfo.nics:
11808         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11809         nic_dict = {"mac": nic.mac,
11810                     "ip": nic.ip,
11811                     "mode": filled_params[constants.NIC_MODE],
11812                     "link": filled_params[constants.NIC_LINK],
11813                    }
11814         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11815           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11816         nic_data.append(nic_dict)
11817       pir = {
11818         "tags": list(iinfo.GetTags()),
11819         "admin_up": iinfo.admin_up,
11820         "vcpus": beinfo[constants.BE_VCPUS],
11821         "memory": beinfo[constants.BE_MEMORY],
11822         "os": iinfo.os,
11823         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11824         "nics": nic_data,
11825         "disks": [{constants.IDISK_SIZE: dsk.size,
11826                    constants.IDISK_MODE: dsk.mode}
11827                   for dsk in iinfo.disks],
11828         "disk_template": iinfo.disk_template,
11829         "hypervisor": iinfo.hypervisor,
11830         }
11831       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11832                                                  pir["disks"])
11833       instance_data[iinfo.name] = pir
11834
11835     return instance_data
11836
11837   def _AddNewInstance(self):
11838     """Add new instance data to allocator structure.
11839
11840     This in combination with _AllocatorGetClusterData will create the
11841     correct structure needed as input for the allocator.
11842
11843     The checks for the completeness of the opcode must have already been
11844     done.
11845
11846     """
11847     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11848
11849     if self.disk_template in constants.DTS_INT_MIRROR:
11850       self.required_nodes = 2
11851     else:
11852       self.required_nodes = 1
11853     request = {
11854       "name": self.name,
11855       "disk_template": self.disk_template,
11856       "tags": self.tags,
11857       "os": self.os,
11858       "vcpus": self.vcpus,
11859       "memory": self.mem_size,
11860       "disks": self.disks,
11861       "disk_space_total": disk_space,
11862       "nics": self.nics,
11863       "required_nodes": self.required_nodes,
11864       }
11865     return request
11866
11867   def _AddRelocateInstance(self):
11868     """Add relocate instance data to allocator structure.
11869
11870     This in combination with _IAllocatorGetClusterData will create the
11871     correct structure needed as input for the allocator.
11872
11873     The checks for the completeness of the opcode must have already been
11874     done.
11875
11876     """
11877     instance = self.cfg.GetInstanceInfo(self.name)
11878     if instance is None:
11879       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11880                                    " IAllocator" % self.name)
11881
11882     if instance.disk_template not in constants.DTS_MIRRORED:
11883       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11884                                  errors.ECODE_INVAL)
11885
11886     if instance.disk_template in constants.DTS_INT_MIRROR and \
11887         len(instance.secondary_nodes) != 1:
11888       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11889                                  errors.ECODE_STATE)
11890
11891     self.required_nodes = 1
11892     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
11893     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11894
11895     request = {
11896       "name": self.name,
11897       "disk_space_total": disk_space,
11898       "required_nodes": self.required_nodes,
11899       "relocate_from": self.relocate_from,
11900       }
11901     return request
11902
11903   def _AddEvacuateNodes(self):
11904     """Add evacuate nodes data to allocator structure.
11905
11906     """
11907     request = {
11908       "evac_nodes": self.evac_nodes
11909       }
11910     return request
11911
11912   def _BuildInputData(self, fn):
11913     """Build input data structures.
11914
11915     """
11916     self._ComputeClusterData()
11917
11918     request = fn()
11919     request["type"] = self.mode
11920     self.in_data["request"] = request
11921
11922     self.in_text = serializer.Dump(self.in_data)
11923
11924   def Run(self, name, validate=True, call_fn=None):
11925     """Run an instance allocator and return the results.
11926
11927     """
11928     if call_fn is None:
11929       call_fn = self.rpc.call_iallocator_runner
11930
11931     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11932     result.Raise("Failure while running the iallocator script")
11933
11934     self.out_text = result.payload
11935     if validate:
11936       self._ValidateResult()
11937
11938   def _ValidateResult(self):
11939     """Process the allocator results.
11940
11941     This will process and if successful save the result in
11942     self.out_data and the other parameters.
11943
11944     """
11945     try:
11946       rdict = serializer.Load(self.out_text)
11947     except Exception, err:
11948       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11949
11950     if not isinstance(rdict, dict):
11951       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11952
11953     # TODO: remove backwards compatiblity in later versions
11954     if "nodes" in rdict and "result" not in rdict:
11955       rdict["result"] = rdict["nodes"]
11956       del rdict["nodes"]
11957
11958     for key in "success", "info", "result":
11959       if key not in rdict:
11960         raise errors.OpExecError("Can't parse iallocator results:"
11961                                  " missing key '%s'" % key)
11962       setattr(self, key, rdict[key])
11963
11964     if not isinstance(rdict["result"], list):
11965       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11966                                " is not a list")
11967
11968     if self.mode == constants.IALLOCATOR_MODE_RELOC:
11969       assert self.relocate_from is not None
11970       assert self.required_nodes == 1
11971
11972       node2group = dict((name, ndata["group"])
11973                         for (name, ndata) in self.in_data["nodes"].items())
11974
11975       fn = compat.partial(self._NodesToGroups, node2group,
11976                           self.in_data["nodegroups"])
11977
11978       request_groups = fn(self.relocate_from)
11979       result_groups = fn(rdict["result"])
11980
11981       if result_groups != request_groups:
11982         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
11983                                  " differ from original groups (%s)" %
11984                                  (utils.CommaJoin(result_groups),
11985                                   utils.CommaJoin(request_groups)))
11986
11987     self.out_data = rdict
11988
11989   @staticmethod
11990   def _NodesToGroups(node2group, groups, nodes):
11991     """Returns a list of unique group names for a list of nodes.
11992
11993     @type node2group: dict
11994     @param node2group: Map from node name to group UUID
11995     @type groups: dict
11996     @param groups: Group information
11997     @type nodes: list
11998     @param nodes: Node names
11999
12000     """
12001     result = set()
12002
12003     for node in nodes:
12004       try:
12005         group_uuid = node2group[node]
12006       except KeyError:
12007         # Ignore unknown node
12008         pass
12009       else:
12010         try:
12011           group = groups[group_uuid]
12012         except KeyError:
12013           # Can't find group, let's use UUID
12014           group_name = group_uuid
12015         else:
12016           group_name = group["name"]
12017
12018         result.add(group_name)
12019
12020     return sorted(result)
12021
12022
12023 class LUTestAllocator(NoHooksLU):
12024   """Run allocator tests.
12025
12026   This LU runs the allocator tests
12027
12028   """
12029   def CheckPrereq(self):
12030     """Check prerequisites.
12031
12032     This checks the opcode parameters depending on the director and mode test.
12033
12034     """
12035     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12036       for attr in ["mem_size", "disks", "disk_template",
12037                    "os", "tags", "nics", "vcpus"]:
12038         if not hasattr(self.op, attr):
12039           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12040                                      attr, errors.ECODE_INVAL)
12041       iname = self.cfg.ExpandInstanceName(self.op.name)
12042       if iname is not None:
12043         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12044                                    iname, errors.ECODE_EXISTS)
12045       if not isinstance(self.op.nics, list):
12046         raise errors.OpPrereqError("Invalid parameter 'nics'",
12047                                    errors.ECODE_INVAL)
12048       if not isinstance(self.op.disks, list):
12049         raise errors.OpPrereqError("Invalid parameter 'disks'",
12050                                    errors.ECODE_INVAL)
12051       for row in self.op.disks:
12052         if (not isinstance(row, dict) or
12053             "size" not in row or
12054             not isinstance(row["size"], int) or
12055             "mode" not in row or
12056             row["mode"] not in ['r', 'w']):
12057           raise errors.OpPrereqError("Invalid contents of the 'disks'"
12058                                      " parameter", errors.ECODE_INVAL)
12059       if self.op.hypervisor is None:
12060         self.op.hypervisor = self.cfg.GetHypervisorType()
12061     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12062       fname = _ExpandInstanceName(self.cfg, self.op.name)
12063       self.op.name = fname
12064       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12065     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12066       if not hasattr(self.op, "evac_nodes"):
12067         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12068                                    " opcode input", errors.ECODE_INVAL)
12069     else:
12070       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12071                                  self.op.mode, errors.ECODE_INVAL)
12072
12073     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12074       if self.op.allocator is None:
12075         raise errors.OpPrereqError("Missing allocator name",
12076                                    errors.ECODE_INVAL)
12077     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12078       raise errors.OpPrereqError("Wrong allocator test '%s'" %
12079                                  self.op.direction, errors.ECODE_INVAL)
12080
12081   def Exec(self, feedback_fn):
12082     """Run the allocator test.
12083
12084     """
12085     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12086       ial = IAllocator(self.cfg, self.rpc,
12087                        mode=self.op.mode,
12088                        name=self.op.name,
12089                        mem_size=self.op.mem_size,
12090                        disks=self.op.disks,
12091                        disk_template=self.op.disk_template,
12092                        os=self.op.os,
12093                        tags=self.op.tags,
12094                        nics=self.op.nics,
12095                        vcpus=self.op.vcpus,
12096                        hypervisor=self.op.hypervisor,
12097                        )
12098     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12099       ial = IAllocator(self.cfg, self.rpc,
12100                        mode=self.op.mode,
12101                        name=self.op.name,
12102                        relocate_from=list(self.relocate_from),
12103                        )
12104     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12105       ial = IAllocator(self.cfg, self.rpc,
12106                        mode=self.op.mode,
12107                        evac_nodes=self.op.evac_nodes)
12108     else:
12109       raise errors.ProgrammerError("Uncatched mode %s in"
12110                                    " LUTestAllocator.Exec", self.op.mode)
12111
12112     if self.op.direction == constants.IALLOCATOR_DIR_IN:
12113       result = ial.in_text
12114     else:
12115       ial.Run(self.op.allocator, validate=False)
12116       result = ial.out_text
12117     return result
12118
12119
12120 #: Query type implementations
12121 _QUERY_IMPL = {
12122   constants.QR_INSTANCE: _InstanceQuery,
12123   constants.QR_NODE: _NodeQuery,
12124   constants.QR_GROUP: _GroupQuery,
12125   constants.QR_OS: _OsQuery,
12126   }
12127
12128 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12129
12130
12131 def _GetQueryImplementation(name):
12132   """Returns the implemtnation for a query type.
12133
12134   @param name: Query type, must be one of L{constants.QR_VIA_OP}
12135
12136   """
12137   try:
12138     return _QUERY_IMPL[name]
12139   except KeyError:
12140     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12141                                errors.ECODE_INVAL)