code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43
  44 from ganeti import ssh
  45 from ganeti import utils
  46 from ganeti import errors
  47 from ganeti import hypervisor
  48 from ganeti import locking
  49 from ganeti import constants
  50 from ganeti import objects
  51 from ganeti import serializer
  52 from ganeti import ssconf
  53 from ganeti import uidpool
  54 from ganeti import compat
  55 from ganeti import masterd
  56 from ganeti import netutils
  57 from ganeti import query
  58 from ganeti import qlang
  59 from ganeti import opcodes
  60
  61 import ganeti.masterd.instance # pylint: disable-msg=W0611
  62
  63
  64 def _SupportsOob(cfg, node):
  65   """Tells if node supports OOB.
  66
  67   @type cfg: L{config.ConfigWriter}
  68   @param cfg: The cluster configuration
  69   @type node: L{objects.Node}
  70   @param node: The node
  71   @return: The OOB script if supported or an empty string otherwise
  72
  73   """
  74   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
  75
  76
  77 class ResultWithJobs:
  78   """Data container for LU results with jobs.
  79
  80   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  81   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  82   contained in the C{jobs} attribute and include the job IDs in the opcode
  83   result.
  84
  85   """
  86   def __init__(self, jobs, **kwargs):
  87     """Initializes this class.
  88
  89     Additional return values can be specified as keyword arguments.
  90
  91     @type jobs: list of lists of L{opcode.OpCode}
  92     @param jobs: A list of lists of opcode objects
  93
  94     """
  95     self.jobs = jobs
  96     self.other = kwargs
  97
  98
  99 class LogicalUnit(object):
 100   """Logical Unit base class.
 101
 102   Subclasses must follow these rules:
 103     - implement ExpandNames
 104     - implement CheckPrereq (except when tasklets are used)
 105     - implement Exec (except when tasklets are used)
 106     - implement BuildHooksEnv
 107     - implement BuildHooksNodes
 108     - redefine HPATH and HTYPE
 109     - optionally redefine their run requirements:
 110         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 111
 112   Note that all commands require root permissions.
 113
 114   @ivar dry_run_result: the value (if any) that will be returned to the caller
 115       in dry-run mode (signalled by opcode dry_run parameter)
 116
 117   """
 118   HPATH = None
 119   HTYPE = None
 120   REQ_BGL = True
 121
 122   def __init__(self, processor, op, context, rpc):
 123     """Constructor for LogicalUnit.
 124
 125     This needs to be overridden in derived classes in order to check op
 126     validity.
 127
 128     """
 129     self.proc = processor
 130     self.op = op
 131     self.cfg = context.cfg
 132     self.context = context
 133     self.rpc = rpc
 134     # Dicts used to declare locking needs to mcpu
 135     self.needed_locks = None
 136     self.acquired_locks = {}
 137     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 138     self.add_locks = {}
 139     self.remove_locks = {}
 140     # Used to force good behavior when calling helper functions
 141     self.recalculate_locks = {}
 142     # logging
 143     self.Log = processor.Log # pylint: disable-msg=C0103
 144     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 145     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 146     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 147     # support for dry-run
 148     self.dry_run_result = None
 149     # support for generic debug attribute
 150     if (not hasattr(self.op, "debug_level") or
 151         not isinstance(self.op.debug_level, int)):
 152       self.op.debug_level = 0
 153
 154     # Tasklets
 155     self.tasklets = None
 156
 157     # Validate opcode parameters and set defaults
 158     self.op.Validate(True)
 159
 160     self.CheckArguments()
 161
 162   def CheckArguments(self):
 163     """Check syntactic validity for the opcode arguments.
 164
 165     This method is for doing a simple syntactic check and ensure
 166     validity of opcode parameters, without any cluster-related
 167     checks. While the same can be accomplished in ExpandNames and/or
 168     CheckPrereq, doing these separate is better because:
 169
 170       - ExpandNames is left as as purely a lock-related function
 171       - CheckPrereq is run after we have acquired locks (and possible
 172         waited for them)
 173
 174     The function is allowed to change the self.op attribute so that
 175     later methods can no longer worry about missing parameters.
 176
 177     """
 178     pass
 179
 180   def ExpandNames(self):
 181     """Expand names for this LU.
 182
 183     This method is called before starting to execute the opcode, and it should
 184     update all the parameters of the opcode to their canonical form (e.g. a
 185     short node name must be fully expanded after this method has successfully
 186     completed). This way locking, hooks, logging, etc. can work correctly.
 187
 188     LUs which implement this method must also populate the self.needed_locks
 189     member, as a dict with lock levels as keys, and a list of needed lock names
 190     as values. Rules:
 191
 192       - use an empty dict if you don't need any lock
 193       - if you don't need any lock at a particular level omit that level
 194       - don't put anything for the BGL level
 195       - if you want all locks at a level use locking.ALL_SET as a value
 196
 197     If you need to share locks (rather than acquire them exclusively) at one
 198     level you can modify self.share_locks, setting a true value (usually 1) for
 199     that level. By default locks are not shared.
 200
 201     This function can also define a list of tasklets, which then will be
 202     executed in order instead of the usual LU-level CheckPrereq and Exec
 203     functions, if those are not defined by the LU.
 204
 205     Examples::
 206
 207       # Acquire all nodes and one instance
 208       self.needed_locks = {
 209         locking.LEVEL_NODE: locking.ALL_SET,
 210         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 211       }
 212       # Acquire just two nodes
 213       self.needed_locks = {
 214         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 215       }
 216       # Acquire no locks
 217       self.needed_locks = {} # No, you can't leave it to the default value None
 218
 219     """
 220     # The implementation of this method is mandatory only if the new LU is
 221     # concurrent, so that old LUs don't need to be changed all at the same
 222     # time.
 223     if self.REQ_BGL:
 224       self.needed_locks = {} # Exclusive LUs don't need locks.
 225     else:
 226       raise NotImplementedError
 227
 228   def DeclareLocks(self, level):
 229     """Declare LU locking needs for a level
 230
 231     While most LUs can just declare their locking needs at ExpandNames time,
 232     sometimes there's the need to calculate some locks after having acquired
 233     the ones before. This function is called just before acquiring locks at a
 234     particular level, but after acquiring the ones at lower levels, and permits
 235     such calculations. It can be used to modify self.needed_locks, and by
 236     default it does nothing.
 237
 238     This function is only called if you have something already set in
 239     self.needed_locks for the level.
 240
 241     @param level: Locking level which is going to be locked
 242     @type level: member of ganeti.locking.LEVELS
 243
 244     """
 245
 246   def CheckPrereq(self):
 247     """Check prerequisites for this LU.
 248
 249     This method should check that the prerequisites for the execution
 250     of this LU are fulfilled. It can do internode communication, but
 251     it should be idempotent - no cluster or system changes are
 252     allowed.
 253
 254     The method should raise errors.OpPrereqError in case something is
 255     not fulfilled. Its return value is ignored.
 256
 257     This method should also update all the parameters of the opcode to
 258     their canonical form if it hasn't been done by ExpandNames before.
 259
 260     """
 261     if self.tasklets is not None:
 262       for (idx, tl) in enumerate(self.tasklets):
 263         logging.debug("Checking prerequisites for tasklet %s/%s",
 264                       idx + 1, len(self.tasklets))
 265         tl.CheckPrereq()
 266     else:
 267       pass
 268
 269   def Exec(self, feedback_fn):
 270     """Execute the LU.
 271
 272     This method should implement the actual work. It should raise
 273     errors.OpExecError for failures that are somewhat dealt with in
 274     code, or expected.
 275
 276     """
 277     if self.tasklets is not None:
 278       for (idx, tl) in enumerate(self.tasklets):
 279         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 280         tl.Exec(feedback_fn)
 281     else:
 282       raise NotImplementedError
 283
 284   def BuildHooksEnv(self):
 285     """Build hooks environment for this LU.
 286
 287     @rtype: dict
 288     @return: Dictionary containing the environment that will be used for
 289       running the hooks for this LU. The keys of the dict must not be prefixed
 290       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 291       will extend the environment with additional variables. If no environment
 292       should be defined, an empty dictionary should be returned (not C{None}).
 293     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 294       will not be called.
 295
 296     """
 297     raise NotImplementedError
 298
 299   def BuildHooksNodes(self):
 300     """Build list of nodes to run LU's hooks.
 301
 302     @rtype: tuple; (list, list)
 303     @return: Tuple containing a list of node names on which the hook
 304       should run before the execution and a list of node names on which the
 305       hook should run after the execution. No nodes should be returned as an
 306       empty list (and not None).
 307     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 308       will not be called.
 309
 310     """
 311     raise NotImplementedError
 312
 313   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 314     """Notify the LU about the results of its hooks.
 315
 316     This method is called every time a hooks phase is executed, and notifies
 317     the Logical Unit about the hooks' result. The LU can then use it to alter
 318     its result based on the hooks.  By default the method does nothing and the
 319     previous result is passed back unchanged but any LU can define it if it
 320     wants to use the local cluster hook-scripts somehow.
 321
 322     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 323         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 324     @param hook_results: the results of the multi-node hooks rpc call
 325     @param feedback_fn: function used send feedback back to the caller
 326     @param lu_result: the previous Exec result this LU had, or None
 327         in the PRE phase
 328     @return: the new Exec result, based on the previous result
 329         and hook results
 330
 331     """
 332     # API must be kept, thus we ignore the unused argument and could
 333     # be a function warnings
 334     # pylint: disable-msg=W0613,R0201
 335     return lu_result
 336
 337   def _ExpandAndLockInstance(self):
 338     """Helper function to expand and lock an instance.
 339
 340     Many LUs that work on an instance take its name in self.op.instance_name
 341     and need to expand it and then declare the expanded name for locking. This
 342     function does it, and then updates self.op.instance_name to the expanded
 343     name. It also initializes needed_locks as a dict, if this hasn't been done
 344     before.
 345
 346     """
 347     if self.needed_locks is None:
 348       self.needed_locks = {}
 349     else:
 350       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 351         "_ExpandAndLockInstance called with instance-level locks set"
 352     self.op.instance_name = _ExpandInstanceName(self.cfg,
 353                                                 self.op.instance_name)
 354     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 355
 356   def _LockInstancesNodes(self, primary_only=False):
 357     """Helper function to declare instances' nodes for locking.
 358
 359     This function should be called after locking one or more instances to lock
 360     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 361     with all primary or secondary nodes for instances already locked and
 362     present in self.needed_locks[locking.LEVEL_INSTANCE].
 363
 364     It should be called from DeclareLocks, and for safety only works if
 365     self.recalculate_locks[locking.LEVEL_NODE] is set.
 366
 367     In the future it may grow parameters to just lock some instance's nodes, or
 368     to just lock primaries or secondary nodes, if needed.
 369
 370     If should be called in DeclareLocks in a way similar to::
 371
 372       if level == locking.LEVEL_NODE:
 373         self._LockInstancesNodes()
 374
 375     @type primary_only: boolean
 376     @param primary_only: only lock primary nodes of locked instances
 377
 378     """
 379     assert locking.LEVEL_NODE in self.recalculate_locks, \
 380       "_LockInstancesNodes helper function called with no nodes to recalculate"
 381
 382     # TODO: check if we're really been called with the instance locks held
 383
 384     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 385     # future we might want to have different behaviors depending on the value
 386     # of self.recalculate_locks[locking.LEVEL_NODE]
 387     wanted_nodes = []
 388     for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
 389       instance = self.context.cfg.GetInstanceInfo(instance_name)
 390       wanted_nodes.append(instance.primary_node)
 391       if not primary_only:
 392         wanted_nodes.extend(instance.secondary_nodes)
 393
 394     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 395       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 396     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 397       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 398
 399     del self.recalculate_locks[locking.LEVEL_NODE]
 400
 401
 402 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 403   """Simple LU which runs no hooks.
 404
 405   This LU is intended as a parent for other LogicalUnits which will
 406   run no hooks, in order to reduce duplicate code.
 407
 408   """
 409   HPATH = None
 410   HTYPE = None
 411
 412   def BuildHooksEnv(self):
 413     """Empty BuildHooksEnv for NoHooksLu.
 414
 415     This just raises an error.
 416
 417     """
 418     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 419
 420   def BuildHooksNodes(self):
 421     """Empty BuildHooksNodes for NoHooksLU.
 422
 423     """
 424     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 425
 426
 427 class Tasklet:
 428   """Tasklet base class.
 429
 430   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 431   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 432   tasklets know nothing about locks.
 433
 434   Subclasses must follow these rules:
 435     - Implement CheckPrereq
 436     - Implement Exec
 437
 438   """
 439   def __init__(self, lu):
 440     self.lu = lu
 441
 442     # Shortcuts
 443     self.cfg = lu.cfg
 444     self.rpc = lu.rpc
 445
 446   def CheckPrereq(self):
 447     """Check prerequisites for this tasklets.
 448
 449     This method should check whether the prerequisites for the execution of
 450     this tasklet are fulfilled. It can do internode communication, but it
 451     should be idempotent - no cluster or system changes are allowed.
 452
 453     The method should raise errors.OpPrereqError in case something is not
 454     fulfilled. Its return value is ignored.
 455
 456     This method should also update all parameters to their canonical form if it
 457     hasn't been done before.
 458
 459     """
 460     pass
 461
 462   def Exec(self, feedback_fn):
 463     """Execute the tasklet.
 464
 465     This method should implement the actual work. It should raise
 466     errors.OpExecError for failures that are somewhat dealt with in code, or
 467     expected.
 468
 469     """
 470     raise NotImplementedError
 471
 472
 473 class _QueryBase:
 474   """Base for query utility classes.
 475
 476   """
 477   #: Attribute holding field definitions
 478   FIELDS = None
 479
 480   def __init__(self, filter_, fields, use_locking):
 481     """Initializes this class.
 482
 483     """
 484     self.use_locking = use_locking
 485
 486     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 487                              namefield="name")
 488     self.requested_data = self.query.RequestedData()
 489     self.names = self.query.RequestedNames()
 490
 491     # Sort only if no names were requested
 492     self.sort_by_name = not self.names
 493
 494     self.do_locking = None
 495     self.wanted = None
 496
 497   def _GetNames(self, lu, all_names, lock_level):
 498     """Helper function to determine names asked for in the query.
 499
 500     """
 501     if self.do_locking:
 502       names = lu.acquired_locks[lock_level]
 503     else:
 504       names = all_names
 505
 506     if self.wanted == locking.ALL_SET:
 507       assert not self.names
 508       # caller didn't specify names, so ordering is not important
 509       return utils.NiceSort(names)
 510
 511     # caller specified names and we must keep the same order
 512     assert self.names
 513     assert not self.do_locking or lu.acquired_locks[lock_level]
 514
 515     missing = set(self.wanted).difference(names)
 516     if missing:
 517       raise errors.OpExecError("Some items were removed before retrieving"
 518                                " their data: %s" % missing)
 519
 520     # Return expanded names
 521     return self.wanted
 522
 523   def ExpandNames(self, lu):
 524     """Expand names for this query.
 525
 526     See L{LogicalUnit.ExpandNames}.
 527
 528     """
 529     raise NotImplementedError()
 530
 531   def DeclareLocks(self, lu, level):
 532     """Declare locks for this query.
 533
 534     See L{LogicalUnit.DeclareLocks}.
 535
 536     """
 537     raise NotImplementedError()
 538
 539   def _GetQueryData(self, lu):
 540     """Collects all data for this query.
 541
 542     @return: Query data object
 543
 544     """
 545     raise NotImplementedError()
 546
 547   def NewStyleQuery(self, lu):
 548     """Collect data and execute query.
 549
 550     """
 551     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 552                                   sort_by_name=self.sort_by_name)
 553
 554   def OldStyleQuery(self, lu):
 555     """Collect data and execute query.
 556
 557     """
 558     return self.query.OldStyleQuery(self._GetQueryData(lu),
 559                                     sort_by_name=self.sort_by_name)
 560
 561
 562 def _GetWantedNodes(lu, nodes):
 563   """Returns list of checked and expanded node names.
 564
 565   @type lu: L{LogicalUnit}
 566   @param lu: the logical unit on whose behalf we execute
 567   @type nodes: list
 568   @param nodes: list of node names or None for all nodes
 569   @rtype: list
 570   @return: the list of nodes, sorted
 571   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 572
 573   """
 574   if nodes:
 575     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 576
 577   return utils.NiceSort(lu.cfg.GetNodeList())
 578
 579
 580 def _GetWantedInstances(lu, instances):
 581   """Returns list of checked and expanded instance names.
 582
 583   @type lu: L{LogicalUnit}
 584   @param lu: the logical unit on whose behalf we execute
 585   @type instances: list
 586   @param instances: list of instance names or None for all instances
 587   @rtype: list
 588   @return: the list of instances, sorted
 589   @raise errors.OpPrereqError: if the instances parameter is wrong type
 590   @raise errors.OpPrereqError: if any of the passed instances is not found
 591
 592   """
 593   if instances:
 594     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 595   else:
 596     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 597   return wanted
 598
 599
 600 def _GetUpdatedParams(old_params, update_dict,
 601                       use_default=True, use_none=False):
 602   """Return the new version of a parameter dictionary.
 603
 604   @type old_params: dict
 605   @param old_params: old parameters
 606   @type update_dict: dict
 607   @param update_dict: dict containing new parameter values, or
 608       constants.VALUE_DEFAULT to reset the parameter to its default
 609       value
 610   @param use_default: boolean
 611   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 612       values as 'to be deleted' values
 613   @param use_none: boolean
 614   @type use_none: whether to recognise C{None} values as 'to be
 615       deleted' values
 616   @rtype: dict
 617   @return: the new parameter dictionary
 618
 619   """
 620   params_copy = copy.deepcopy(old_params)
 621   for key, val in update_dict.iteritems():
 622     if ((use_default and val == constants.VALUE_DEFAULT) or
 623         (use_none and val is None)):
 624       try:
 625         del params_copy[key]
 626       except KeyError:
 627         pass
 628     else:
 629       params_copy[key] = val
 630   return params_copy
 631
 632
 633 def _ReleaseLocks(lu, level, names=None, keep=None):
 634   """Releases locks owned by an LU.
 635
 636   @type lu: L{LogicalUnit}
 637   @param level: Lock level
 638   @type names: list or None
 639   @param names: Names of locks to release
 640   @type keep: list or None
 641   @param keep: Names of locks to retain
 642
 643   """
 644   assert not (keep is not None and names is not None), \
 645          "Only one of the 'names' and the 'keep' parameters can be given"
 646
 647   if names is not None:
 648     should_release = names.__contains__
 649   elif keep:
 650     should_release = lambda name: name not in keep
 651   else:
 652     should_release = None
 653
 654   if should_release:
 655     retain = []
 656     release = []
 657
 658     # Determine which locks to release
 659     for name in lu.acquired_locks[level]:
 660       if should_release(name):
 661         release.append(name)
 662       else:
 663         retain.append(name)
 664
 665     assert len(lu.acquired_locks[level]) == (len(retain) + len(release))
 666
 667     # Release just some locks
 668     lu.context.glm.release(level, names=release)
 669     lu.acquired_locks[level] = retain
 670
 671     assert frozenset(lu.context.glm.list_owned(level)) == frozenset(retain)
 672   else:
 673     # Release everything
 674     lu.context.glm.release(level)
 675     del lu.acquired_locks[level]
 676
 677     assert not lu.context.glm.list_owned(level), "No locks should be owned"
 678
 679
 680 def _RunPostHook(lu, node_name):
 681   """Runs the post-hook for an opcode on a single node.
 682
 683   """
 684   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 685   try:
 686     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 687   except:
 688     # pylint: disable-msg=W0702
 689     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 690
 691
 692 def _CheckOutputFields(static, dynamic, selected):
 693   """Checks whether all selected fields are valid.
 694
 695   @type static: L{utils.FieldSet}
 696   @param static: static fields set
 697   @type dynamic: L{utils.FieldSet}
 698   @param dynamic: dynamic fields set
 699
 700   """
 701   f = utils.FieldSet()
 702   f.Extend(static)
 703   f.Extend(dynamic)
 704
 705   delta = f.NonMatching(selected)
 706   if delta:
 707     raise errors.OpPrereqError("Unknown output fields selected: %s"
 708                                % ",".join(delta), errors.ECODE_INVAL)
 709
 710
 711 def _CheckGlobalHvParams(params):
 712   """Validates that given hypervisor params are not global ones.
 713
 714   This will ensure that instances don't get customised versions of
 715   global params.
 716
 717   """
 718   used_globals = constants.HVC_GLOBALS.intersection(params)
 719   if used_globals:
 720     msg = ("The following hypervisor parameters are global and cannot"
 721            " be customized at instance level, please modify them at"
 722            " cluster level: %s" % utils.CommaJoin(used_globals))
 723     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 724
 725
 726 def _CheckNodeOnline(lu, node, msg=None):
 727   """Ensure that a given node is online.
 728
 729   @param lu: the LU on behalf of which we make the check
 730   @param node: the node to check
 731   @param msg: if passed, should be a message to replace the default one
 732   @raise errors.OpPrereqError: if the node is offline
 733
 734   """
 735   if msg is None:
 736     msg = "Can't use offline node"
 737   if lu.cfg.GetNodeInfo(node).offline:
 738     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 739
 740
 741 def _CheckNodeNotDrained(lu, node):
 742   """Ensure that a given node is not drained.
 743
 744   @param lu: the LU on behalf of which we make the check
 745   @param node: the node to check
 746   @raise errors.OpPrereqError: if the node is drained
 747
 748   """
 749   if lu.cfg.GetNodeInfo(node).drained:
 750     raise errors.OpPrereqError("Can't use drained node %s" % node,
 751                                errors.ECODE_STATE)
 752
 753
 754 def _CheckNodeVmCapable(lu, node):
 755   """Ensure that a given node is vm capable.
 756
 757   @param lu: the LU on behalf of which we make the check
 758   @param node: the node to check
 759   @raise errors.OpPrereqError: if the node is not vm capable
 760
 761   """
 762   if not lu.cfg.GetNodeInfo(node).vm_capable:
 763     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 764                                errors.ECODE_STATE)
 765
 766
 767 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 768   """Ensure that a node supports a given OS.
 769
 770   @param lu: the LU on behalf of which we make the check
 771   @param node: the node to check
 772   @param os_name: the OS to query about
 773   @param force_variant: whether to ignore variant errors
 774   @raise errors.OpPrereqError: if the node is not supporting the OS
 775
 776   """
 777   result = lu.rpc.call_os_get(node, os_name)
 778   result.Raise("OS '%s' not in supported OS list for node %s" %
 779                (os_name, node),
 780                prereq=True, ecode=errors.ECODE_INVAL)
 781   if not force_variant:
 782     _CheckOSVariant(result.payload, os_name)
 783
 784
 785 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 786   """Ensure that a node has the given secondary ip.
 787
 788   @type lu: L{LogicalUnit}
 789   @param lu: the LU on behalf of which we make the check
 790   @type node: string
 791   @param node: the node to check
 792   @type secondary_ip: string
 793   @param secondary_ip: the ip to check
 794   @type prereq: boolean
 795   @param prereq: whether to throw a prerequisite or an execute error
 796   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 797   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 798
 799   """
 800   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 801   result.Raise("Failure checking secondary ip on node %s" % node,
 802                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 803   if not result.payload:
 804     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 805            " please fix and re-run this command" % secondary_ip)
 806     if prereq:
 807       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 808     else:
 809       raise errors.OpExecError(msg)
 810
 811
 812 def _GetClusterDomainSecret():
 813   """Reads the cluster domain secret.
 814
 815   """
 816   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 817                                strict=True)
 818
 819
 820 def _CheckInstanceDown(lu, instance, reason):
 821   """Ensure that an instance is not running."""
 822   if instance.admin_up:
 823     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 824                                (instance.name, reason), errors.ECODE_STATE)
 825
 826   pnode = instance.primary_node
 827   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 828   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 829               prereq=True, ecode=errors.ECODE_ENVIRON)
 830
 831   if instance.name in ins_l.payload:
 832     raise errors.OpPrereqError("Instance %s is running, %s" %
 833                                (instance.name, reason), errors.ECODE_STATE)
 834
 835
 836 def _ExpandItemName(fn, name, kind):
 837   """Expand an item name.
 838
 839   @param fn: the function to use for expansion
 840   @param name: requested item name
 841   @param kind: text description ('Node' or 'Instance')
 842   @return: the resolved (full) name
 843   @raise errors.OpPrereqError: if the item is not found
 844
 845   """
 846   full_name = fn(name)
 847   if full_name is None:
 848     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 849                                errors.ECODE_NOENT)
 850   return full_name
 851
 852
 853 def _ExpandNodeName(cfg, name):
 854   """Wrapper over L{_ExpandItemName} for nodes."""
 855   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 856
 857
 858 def _ExpandInstanceName(cfg, name):
 859   """Wrapper over L{_ExpandItemName} for instance."""
 860   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 861
 862
 863 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 864                           memory, vcpus, nics, disk_template, disks,
 865                           bep, hvp, hypervisor_name):
 866   """Builds instance related env variables for hooks
 867
 868   This builds the hook environment from individual variables.
 869
 870   @type name: string
 871   @param name: the name of the instance
 872   @type primary_node: string
 873   @param primary_node: the name of the instance's primary node
 874   @type secondary_nodes: list
 875   @param secondary_nodes: list of secondary nodes as strings
 876   @type os_type: string
 877   @param os_type: the name of the instance's OS
 878   @type status: boolean
 879   @param status: the should_run status of the instance
 880   @type memory: string
 881   @param memory: the memory size of the instance
 882   @type vcpus: string
 883   @param vcpus: the count of VCPUs the instance has
 884   @type nics: list
 885   @param nics: list of tuples (ip, mac, mode, link) representing
 886       the NICs the instance has
 887   @type disk_template: string
 888   @param disk_template: the disk template of the instance
 889   @type disks: list
 890   @param disks: the list of (size, mode) pairs
 891   @type bep: dict
 892   @param bep: the backend parameters for the instance
 893   @type hvp: dict
 894   @param hvp: the hypervisor parameters for the instance
 895   @type hypervisor_name: string
 896   @param hypervisor_name: the hypervisor for the instance
 897   @rtype: dict
 898   @return: the hook environment for this instance
 899
 900   """
 901   if status:
 902     str_status = "up"
 903   else:
 904     str_status = "down"
 905   env = {
 906     "OP_TARGET": name,
 907     "INSTANCE_NAME": name,
 908     "INSTANCE_PRIMARY": primary_node,
 909     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 910     "INSTANCE_OS_TYPE": os_type,
 911     "INSTANCE_STATUS": str_status,
 912     "INSTANCE_MEMORY": memory,
 913     "INSTANCE_VCPUS": vcpus,
 914     "INSTANCE_DISK_TEMPLATE": disk_template,
 915     "INSTANCE_HYPERVISOR": hypervisor_name,
 916   }
 917
 918   if nics:
 919     nic_count = len(nics)
 920     for idx, (ip, mac, mode, link) in enumerate(nics):
 921       if ip is None:
 922         ip = ""
 923       env["INSTANCE_NIC%d_IP" % idx] = ip
 924       env["INSTANCE_NIC%d_MAC" % idx] = mac
 925       env["INSTANCE_NIC%d_MODE" % idx] = mode
 926       env["INSTANCE_NIC%d_LINK" % idx] = link
 927       if mode == constants.NIC_MODE_BRIDGED:
 928         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 929   else:
 930     nic_count = 0
 931
 932   env["INSTANCE_NIC_COUNT"] = nic_count
 933
 934   if disks:
 935     disk_count = len(disks)
 936     for idx, (size, mode) in enumerate(disks):
 937       env["INSTANCE_DISK%d_SIZE" % idx] = size
 938       env["INSTANCE_DISK%d_MODE" % idx] = mode
 939   else:
 940     disk_count = 0
 941
 942   env["INSTANCE_DISK_COUNT"] = disk_count
 943
 944   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 945     for key, value in source.items():
 946       env["INSTANCE_%s_%s" % (kind, key)] = value
 947
 948   return env
 949
 950
 951 def _NICListToTuple(lu, nics):
 952   """Build a list of nic information tuples.
 953
 954   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 955   value in LUInstanceQueryData.
 956
 957   @type lu:  L{LogicalUnit}
 958   @param lu: the logical unit on whose behalf we execute
 959   @type nics: list of L{objects.NIC}
 960   @param nics: list of nics to convert to hooks tuples
 961
 962   """
 963   hooks_nics = []
 964   cluster = lu.cfg.GetClusterInfo()
 965   for nic in nics:
 966     ip = nic.ip
 967     mac = nic.mac
 968     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 969     mode = filled_params[constants.NIC_MODE]
 970     link = filled_params[constants.NIC_LINK]
 971     hooks_nics.append((ip, mac, mode, link))
 972   return hooks_nics
 973
 974
 975 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
 976   """Builds instance related env variables for hooks from an object.
 977
 978   @type lu: L{LogicalUnit}
 979   @param lu: the logical unit on whose behalf we execute
 980   @type instance: L{objects.Instance}
 981   @param instance: the instance for which we should build the
 982       environment
 983   @type override: dict
 984   @param override: dictionary with key/values that will override
 985       our values
 986   @rtype: dict
 987   @return: the hook environment dictionary
 988
 989   """
 990   cluster = lu.cfg.GetClusterInfo()
 991   bep = cluster.FillBE(instance)
 992   hvp = cluster.FillHV(instance)
 993   args = {
 994     'name': instance.name,
 995     'primary_node': instance.primary_node,
 996     'secondary_nodes': instance.secondary_nodes,
 997     'os_type': instance.os,
 998     'status': instance.admin_up,
 999     'memory': bep[constants.BE_MEMORY],
1000     'vcpus': bep[constants.BE_VCPUS],
1001     'nics': _NICListToTuple(lu, instance.nics),
1002     'disk_template': instance.disk_template,
1003     'disks': [(disk.size, disk.mode) for disk in instance.disks],
1004     'bep': bep,
1005     'hvp': hvp,
1006     'hypervisor_name': instance.hypervisor,
1007   }
1008   if override:
1009     args.update(override)
1010   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1011
1012
1013 def _AdjustCandidatePool(lu, exceptions):
1014   """Adjust the candidate pool after node operations.
1015
1016   """
1017   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1018   if mod_list:
1019     lu.LogInfo("Promoted nodes to master candidate role: %s",
1020                utils.CommaJoin(node.name for node in mod_list))
1021     for name in mod_list:
1022       lu.context.ReaddNode(name)
1023   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1024   if mc_now > mc_max:
1025     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1026                (mc_now, mc_max))
1027
1028
1029 def _DecideSelfPromotion(lu, exceptions=None):
1030   """Decide whether I should promote myself as a master candidate.
1031
1032   """
1033   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1034   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1035   # the new node will increase mc_max with one, so:
1036   mc_should = min(mc_should + 1, cp_size)
1037   return mc_now < mc_should
1038
1039
1040 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1041   """Check that the brigdes needed by a list of nics exist.
1042
1043   """
1044   cluster = lu.cfg.GetClusterInfo()
1045   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1046   brlist = [params[constants.NIC_LINK] for params in paramslist
1047             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1048   if brlist:
1049     result = lu.rpc.call_bridges_exist(target_node, brlist)
1050     result.Raise("Error checking bridges on destination node '%s'" %
1051                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1052
1053
1054 def _CheckInstanceBridgesExist(lu, instance, node=None):
1055   """Check that the brigdes needed by an instance exist.
1056
1057   """
1058   if node is None:
1059     node = instance.primary_node
1060   _CheckNicsBridgesExist(lu, instance.nics, node)
1061
1062
1063 def _CheckOSVariant(os_obj, name):
1064   """Check whether an OS name conforms to the os variants specification.
1065
1066   @type os_obj: L{objects.OS}
1067   @param os_obj: OS object to check
1068   @type name: string
1069   @param name: OS name passed by the user, to check for validity
1070
1071   """
1072   if not os_obj.supported_variants:
1073     return
1074   variant = objects.OS.GetVariant(name)
1075   if not variant:
1076     raise errors.OpPrereqError("OS name must include a variant",
1077                                errors.ECODE_INVAL)
1078
1079   if variant not in os_obj.supported_variants:
1080     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1081
1082
1083 def _GetNodeInstancesInner(cfg, fn):
1084   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1085
1086
1087 def _GetNodeInstances(cfg, node_name):
1088   """Returns a list of all primary and secondary instances on a node.
1089
1090   """
1091
1092   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1093
1094
1095 def _GetNodePrimaryInstances(cfg, node_name):
1096   """Returns primary instances on a node.
1097
1098   """
1099   return _GetNodeInstancesInner(cfg,
1100                                 lambda inst: node_name == inst.primary_node)
1101
1102
1103 def _GetNodeSecondaryInstances(cfg, node_name):
1104   """Returns secondary instances on a node.
1105
1106   """
1107   return _GetNodeInstancesInner(cfg,
1108                                 lambda inst: node_name in inst.secondary_nodes)
1109
1110
1111 def _GetStorageTypeArgs(cfg, storage_type):
1112   """Returns the arguments for a storage type.
1113
1114   """
1115   # Special case for file storage
1116   if storage_type == constants.ST_FILE:
1117     # storage.FileStorage wants a list of storage directories
1118     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1119
1120   return []
1121
1122
1123 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1124   faulty = []
1125
1126   for dev in instance.disks:
1127     cfg.SetDiskID(dev, node_name)
1128
1129   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1130   result.Raise("Failed to get disk status from node %s" % node_name,
1131                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1132
1133   for idx, bdev_status in enumerate(result.payload):
1134     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1135       faulty.append(idx)
1136
1137   return faulty
1138
1139
1140 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1141   """Check the sanity of iallocator and node arguments and use the
1142   cluster-wide iallocator if appropriate.
1143
1144   Check that at most one of (iallocator, node) is specified. If none is
1145   specified, then the LU's opcode's iallocator slot is filled with the
1146   cluster-wide default iallocator.
1147
1148   @type iallocator_slot: string
1149   @param iallocator_slot: the name of the opcode iallocator slot
1150   @type node_slot: string
1151   @param node_slot: the name of the opcode target node slot
1152
1153   """
1154   node = getattr(lu.op, node_slot, None)
1155   iallocator = getattr(lu.op, iallocator_slot, None)
1156
1157   if node is not None and iallocator is not None:
1158     raise errors.OpPrereqError("Do not specify both, iallocator and node.",
1159                                errors.ECODE_INVAL)
1160   elif node is None and iallocator is None:
1161     default_iallocator = lu.cfg.GetDefaultIAllocator()
1162     if default_iallocator:
1163       setattr(lu.op, iallocator_slot, default_iallocator)
1164     else:
1165       raise errors.OpPrereqError("No iallocator or node given and no"
1166                                  " cluster-wide default iallocator found."
1167                                  " Please specify either an iallocator or a"
1168                                  " node, or set a cluster-wide default"
1169                                  " iallocator.")
1170
1171
1172 class LUClusterPostInit(LogicalUnit):
1173   """Logical unit for running hooks after cluster initialization.
1174
1175   """
1176   HPATH = "cluster-init"
1177   HTYPE = constants.HTYPE_CLUSTER
1178
1179   def BuildHooksEnv(self):
1180     """Build hooks env.
1181
1182     """
1183     return {
1184       "OP_TARGET": self.cfg.GetClusterName(),
1185       }
1186
1187   def BuildHooksNodes(self):
1188     """Build hooks nodes.
1189
1190     """
1191     return ([], [self.cfg.GetMasterNode()])
1192
1193   def Exec(self, feedback_fn):
1194     """Nothing to do.
1195
1196     """
1197     return True
1198
1199
1200 class LUClusterDestroy(LogicalUnit):
1201   """Logical unit for destroying the cluster.
1202
1203   """
1204   HPATH = "cluster-destroy"
1205   HTYPE = constants.HTYPE_CLUSTER
1206
1207   def BuildHooksEnv(self):
1208     """Build hooks env.
1209
1210     """
1211     return {
1212       "OP_TARGET": self.cfg.GetClusterName(),
1213       }
1214
1215   def BuildHooksNodes(self):
1216     """Build hooks nodes.
1217
1218     """
1219     return ([], [])
1220
1221   def CheckPrereq(self):
1222     """Check prerequisites.
1223
1224     This checks whether the cluster is empty.
1225
1226     Any errors are signaled by raising errors.OpPrereqError.
1227
1228     """
1229     master = self.cfg.GetMasterNode()
1230
1231     nodelist = self.cfg.GetNodeList()
1232     if len(nodelist) != 1 or nodelist[0] != master:
1233       raise errors.OpPrereqError("There are still %d node(s) in"
1234                                  " this cluster." % (len(nodelist) - 1),
1235                                  errors.ECODE_INVAL)
1236     instancelist = self.cfg.GetInstanceList()
1237     if instancelist:
1238       raise errors.OpPrereqError("There are still %d instance(s) in"
1239                                  " this cluster." % len(instancelist),
1240                                  errors.ECODE_INVAL)
1241
1242   def Exec(self, feedback_fn):
1243     """Destroys the cluster.
1244
1245     """
1246     master = self.cfg.GetMasterNode()
1247
1248     # Run post hooks on master node before it's removed
1249     _RunPostHook(self, master)
1250
1251     result = self.rpc.call_node_stop_master(master, False)
1252     result.Raise("Could not disable the master role")
1253
1254     return master
1255
1256
1257 def _VerifyCertificate(filename):
1258   """Verifies a certificate for LUClusterVerify.
1259
1260   @type filename: string
1261   @param filename: Path to PEM file
1262
1263   """
1264   try:
1265     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1266                                            utils.ReadFile(filename))
1267   except Exception, err: # pylint: disable-msg=W0703
1268     return (LUClusterVerify.ETYPE_ERROR,
1269             "Failed to load X509 certificate %s: %s" % (filename, err))
1270
1271   (errcode, msg) = \
1272     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1273                                 constants.SSL_CERT_EXPIRATION_ERROR)
1274
1275   if msg:
1276     fnamemsg = "While verifying %s: %s" % (filename, msg)
1277   else:
1278     fnamemsg = None
1279
1280   if errcode is None:
1281     return (None, fnamemsg)
1282   elif errcode == utils.CERT_WARNING:
1283     return (LUClusterVerify.ETYPE_WARNING, fnamemsg)
1284   elif errcode == utils.CERT_ERROR:
1285     return (LUClusterVerify.ETYPE_ERROR, fnamemsg)
1286
1287   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1288
1289
1290 class LUClusterVerify(LogicalUnit):
1291   """Verifies the cluster status.
1292
1293   """
1294   HPATH = "cluster-verify"
1295   HTYPE = constants.HTYPE_CLUSTER
1296   REQ_BGL = False
1297
1298   TCLUSTER = "cluster"
1299   TNODE = "node"
1300   TINSTANCE = "instance"
1301
1302   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1303   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1304   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1305   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1306   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1307   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1308   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1309   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1310   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1311   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1312   ENODEDRBD = (TNODE, "ENODEDRBD")
1313   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1314   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1315   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1316   ENODEHV = (TNODE, "ENODEHV")
1317   ENODELVM = (TNODE, "ENODELVM")
1318   ENODEN1 = (TNODE, "ENODEN1")
1319   ENODENET = (TNODE, "ENODENET")
1320   ENODEOS = (TNODE, "ENODEOS")
1321   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1322   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1323   ENODERPC = (TNODE, "ENODERPC")
1324   ENODESSH = (TNODE, "ENODESSH")
1325   ENODEVERSION = (TNODE, "ENODEVERSION")
1326   ENODESETUP = (TNODE, "ENODESETUP")
1327   ENODETIME = (TNODE, "ENODETIME")
1328   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1329
1330   ETYPE_FIELD = "code"
1331   ETYPE_ERROR = "ERROR"
1332   ETYPE_WARNING = "WARNING"
1333
1334   _HOOKS_INDENT_RE = re.compile("^", re.M)
1335
1336   class NodeImage(object):
1337     """A class representing the logical and physical status of a node.
1338
1339     @type name: string
1340     @ivar name: the node name to which this object refers
1341     @ivar volumes: a structure as returned from
1342         L{ganeti.backend.GetVolumeList} (runtime)
1343     @ivar instances: a list of running instances (runtime)
1344     @ivar pinst: list of configured primary instances (config)
1345     @ivar sinst: list of configured secondary instances (config)
1346     @ivar sbp: dictionary of {primary-node: list of instances} for all
1347         instances for which this node is secondary (config)
1348     @ivar mfree: free memory, as reported by hypervisor (runtime)
1349     @ivar dfree: free disk, as reported by the node (runtime)
1350     @ivar offline: the offline status (config)
1351     @type rpc_fail: boolean
1352     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1353         not whether the individual keys were correct) (runtime)
1354     @type lvm_fail: boolean
1355     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1356     @type hyp_fail: boolean
1357     @ivar hyp_fail: whether the RPC call didn't return the instance list
1358     @type ghost: boolean
1359     @ivar ghost: whether this is a known node or not (config)
1360     @type os_fail: boolean
1361     @ivar os_fail: whether the RPC call didn't return valid OS data
1362     @type oslist: list
1363     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1364     @type vm_capable: boolean
1365     @ivar vm_capable: whether the node can host instances
1366
1367     """
1368     def __init__(self, offline=False, name=None, vm_capable=True):
1369       self.name = name
1370       self.volumes = {}
1371       self.instances = []
1372       self.pinst = []
1373       self.sinst = []
1374       self.sbp = {}
1375       self.mfree = 0
1376       self.dfree = 0
1377       self.offline = offline
1378       self.vm_capable = vm_capable
1379       self.rpc_fail = False
1380       self.lvm_fail = False
1381       self.hyp_fail = False
1382       self.ghost = False
1383       self.os_fail = False
1384       self.oslist = {}
1385
1386   def ExpandNames(self):
1387     self.needed_locks = {
1388       locking.LEVEL_NODE: locking.ALL_SET,
1389       locking.LEVEL_INSTANCE: locking.ALL_SET,
1390     }
1391     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
1392
1393   def _Error(self, ecode, item, msg, *args, **kwargs):
1394     """Format an error message.
1395
1396     Based on the opcode's error_codes parameter, either format a
1397     parseable error code, or a simpler error string.
1398
1399     This must be called only from Exec and functions called from Exec.
1400
1401     """
1402     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1403     itype, etxt = ecode
1404     # first complete the msg
1405     if args:
1406       msg = msg % args
1407     # then format the whole message
1408     if self.op.error_codes:
1409       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1410     else:
1411       if item:
1412         item = " " + item
1413       else:
1414         item = ""
1415       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1416     # and finally report it via the feedback_fn
1417     self._feedback_fn("  - %s" % msg)
1418
1419   def _ErrorIf(self, cond, *args, **kwargs):
1420     """Log an error message if the passed condition is True.
1421
1422     """
1423     cond = bool(cond) or self.op.debug_simulate_errors
1424     if cond:
1425       self._Error(*args, **kwargs)
1426     # do not mark the operation as failed for WARN cases only
1427     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1428       self.bad = self.bad or cond
1429
1430   def _VerifyNode(self, ninfo, nresult):
1431     """Perform some basic validation on data returned from a node.
1432
1433       - check the result data structure is well formed and has all the
1434         mandatory fields
1435       - check ganeti version
1436
1437     @type ninfo: L{objects.Node}
1438     @param ninfo: the node to check
1439     @param nresult: the results from the node
1440     @rtype: boolean
1441     @return: whether overall this call was successful (and we can expect
1442          reasonable values in the respose)
1443
1444     """
1445     node = ninfo.name
1446     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1447
1448     # main result, nresult should be a non-empty dict
1449     test = not nresult or not isinstance(nresult, dict)
1450     _ErrorIf(test, self.ENODERPC, node,
1451                   "unable to verify node: no data returned")
1452     if test:
1453       return False
1454
1455     # compares ganeti version
1456     local_version = constants.PROTOCOL_VERSION
1457     remote_version = nresult.get("version", None)
1458     test = not (remote_version and
1459                 isinstance(remote_version, (list, tuple)) and
1460                 len(remote_version) == 2)
1461     _ErrorIf(test, self.ENODERPC, node,
1462              "connection to node returned invalid data")
1463     if test:
1464       return False
1465
1466     test = local_version != remote_version[0]
1467     _ErrorIf(test, self.ENODEVERSION, node,
1468              "incompatible protocol versions: master %s,"
1469              " node %s", local_version, remote_version[0])
1470     if test:
1471       return False
1472
1473     # node seems compatible, we can actually try to look into its results
1474
1475     # full package version
1476     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1477                   self.ENODEVERSION, node,
1478                   "software version mismatch: master %s, node %s",
1479                   constants.RELEASE_VERSION, remote_version[1],
1480                   code=self.ETYPE_WARNING)
1481
1482     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1483     if ninfo.vm_capable and isinstance(hyp_result, dict):
1484       for hv_name, hv_result in hyp_result.iteritems():
1485         test = hv_result is not None
1486         _ErrorIf(test, self.ENODEHV, node,
1487                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1488
1489     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1490     if ninfo.vm_capable and isinstance(hvp_result, list):
1491       for item, hv_name, hv_result in hvp_result:
1492         _ErrorIf(True, self.ENODEHV, node,
1493                  "hypervisor %s parameter verify failure (source %s): %s",
1494                  hv_name, item, hv_result)
1495
1496     test = nresult.get(constants.NV_NODESETUP,
1497                        ["Missing NODESETUP results"])
1498     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1499              "; ".join(test))
1500
1501     return True
1502
1503   def _VerifyNodeTime(self, ninfo, nresult,
1504                       nvinfo_starttime, nvinfo_endtime):
1505     """Check the node time.
1506
1507     @type ninfo: L{objects.Node}
1508     @param ninfo: the node to check
1509     @param nresult: the remote results for the node
1510     @param nvinfo_starttime: the start time of the RPC call
1511     @param nvinfo_endtime: the end time of the RPC call
1512
1513     """
1514     node = ninfo.name
1515     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1516
1517     ntime = nresult.get(constants.NV_TIME, None)
1518     try:
1519       ntime_merged = utils.MergeTime(ntime)
1520     except (ValueError, TypeError):
1521       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1522       return
1523
1524     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1525       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1526     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1527       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1528     else:
1529       ntime_diff = None
1530
1531     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1532              "Node time diverges by at least %s from master node time",
1533              ntime_diff)
1534
1535   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1536     """Check the node time.
1537
1538     @type ninfo: L{objects.Node}
1539     @param ninfo: the node to check
1540     @param nresult: the remote results for the node
1541     @param vg_name: the configured VG name
1542
1543     """
1544     if vg_name is None:
1545       return
1546
1547     node = ninfo.name
1548     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1549
1550     # checks vg existence and size > 20G
1551     vglist = nresult.get(constants.NV_VGLIST, None)
1552     test = not vglist
1553     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1554     if not test:
1555       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1556                                             constants.MIN_VG_SIZE)
1557       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1558
1559     # check pv names
1560     pvlist = nresult.get(constants.NV_PVLIST, None)
1561     test = pvlist is None
1562     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1563     if not test:
1564       # check that ':' is not present in PV names, since it's a
1565       # special character for lvcreate (denotes the range of PEs to
1566       # use on the PV)
1567       for _, pvname, owner_vg in pvlist:
1568         test = ":" in pvname
1569         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1570                  " '%s' of VG '%s'", pvname, owner_vg)
1571
1572   def _VerifyNodeNetwork(self, ninfo, nresult):
1573     """Check the node time.
1574
1575     @type ninfo: L{objects.Node}
1576     @param ninfo: the node to check
1577     @param nresult: the remote results for the node
1578
1579     """
1580     node = ninfo.name
1581     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1582
1583     test = constants.NV_NODELIST not in nresult
1584     _ErrorIf(test, self.ENODESSH, node,
1585              "node hasn't returned node ssh connectivity data")
1586     if not test:
1587       if nresult[constants.NV_NODELIST]:
1588         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1589           _ErrorIf(True, self.ENODESSH, node,
1590                    "ssh communication with node '%s': %s", a_node, a_msg)
1591
1592     test = constants.NV_NODENETTEST not in nresult
1593     _ErrorIf(test, self.ENODENET, node,
1594              "node hasn't returned node tcp connectivity data")
1595     if not test:
1596       if nresult[constants.NV_NODENETTEST]:
1597         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1598         for anode in nlist:
1599           _ErrorIf(True, self.ENODENET, node,
1600                    "tcp communication with node '%s': %s",
1601                    anode, nresult[constants.NV_NODENETTEST][anode])
1602
1603     test = constants.NV_MASTERIP not in nresult
1604     _ErrorIf(test, self.ENODENET, node,
1605              "node hasn't returned node master IP reachability data")
1606     if not test:
1607       if not nresult[constants.NV_MASTERIP]:
1608         if node == self.master_node:
1609           msg = "the master node cannot reach the master IP (not configured?)"
1610         else:
1611           msg = "cannot reach the master IP"
1612         _ErrorIf(True, self.ENODENET, node, msg)
1613
1614   def _VerifyInstance(self, instance, instanceconfig, node_image,
1615                       diskstatus):
1616     """Verify an instance.
1617
1618     This function checks to see if the required block devices are
1619     available on the instance's node.
1620
1621     """
1622     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1623     node_current = instanceconfig.primary_node
1624
1625     node_vol_should = {}
1626     instanceconfig.MapLVsByNode(node_vol_should)
1627
1628     for node in node_vol_should:
1629       n_img = node_image[node]
1630       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1631         # ignore missing volumes on offline or broken nodes
1632         continue
1633       for volume in node_vol_should[node]:
1634         test = volume not in n_img.volumes
1635         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1636                  "volume %s missing on node %s", volume, node)
1637
1638     if instanceconfig.admin_up:
1639       pri_img = node_image[node_current]
1640       test = instance not in pri_img.instances and not pri_img.offline
1641       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1642                "instance not running on its primary node %s",
1643                node_current)
1644
1645     for node, n_img in node_image.items():
1646       if node != node_current:
1647         test = instance in n_img.instances
1648         _ErrorIf(test, self.EINSTANCEWRONGNODE, instance,
1649                  "instance should not run on node %s", node)
1650
1651     diskdata = [(nname, success, status, idx)
1652                 for (nname, disks) in diskstatus.items()
1653                 for idx, (success, status) in enumerate(disks)]
1654
1655     for nname, success, bdev_status, idx in diskdata:
1656       # the 'ghost node' construction in Exec() ensures that we have a
1657       # node here
1658       snode = node_image[nname]
1659       bad_snode = snode.ghost or snode.offline
1660       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1661                self.EINSTANCEFAULTYDISK, instance,
1662                "couldn't retrieve status for disk/%s on %s: %s",
1663                idx, nname, bdev_status)
1664       _ErrorIf((instanceconfig.admin_up and success and
1665                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1666                self.EINSTANCEFAULTYDISK, instance,
1667                "disk/%s on %s is faulty", idx, nname)
1668
1669   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1670     """Verify if there are any unknown volumes in the cluster.
1671
1672     The .os, .swap and backup volumes are ignored. All other volumes are
1673     reported as unknown.
1674
1675     @type reserved: L{ganeti.utils.FieldSet}
1676     @param reserved: a FieldSet of reserved volume names
1677
1678     """
1679     for node, n_img in node_image.items():
1680       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1681         # skip non-healthy nodes
1682         continue
1683       for volume in n_img.volumes:
1684         test = ((node not in node_vol_should or
1685                 volume not in node_vol_should[node]) and
1686                 not reserved.Matches(volume))
1687         self._ErrorIf(test, self.ENODEORPHANLV, node,
1688                       "volume %s is unknown", volume)
1689
1690   def _VerifyOrphanInstances(self, instancelist, node_image):
1691     """Verify the list of running instances.
1692
1693     This checks what instances are running but unknown to the cluster.
1694
1695     """
1696     for node, n_img in node_image.items():
1697       for o_inst in n_img.instances:
1698         test = o_inst not in instancelist
1699         self._ErrorIf(test, self.ENODEORPHANINSTANCE, node,
1700                       "instance %s on node %s should not exist", o_inst, node)
1701
1702   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1703     """Verify N+1 Memory Resilience.
1704
1705     Check that if one single node dies we can still start all the
1706     instances it was primary for.
1707
1708     """
1709     cluster_info = self.cfg.GetClusterInfo()
1710     for node, n_img in node_image.items():
1711       # This code checks that every node which is now listed as
1712       # secondary has enough memory to host all instances it is
1713       # supposed to should a single other node in the cluster fail.
1714       # FIXME: not ready for failover to an arbitrary node
1715       # FIXME: does not support file-backed instances
1716       # WARNING: we currently take into account down instances as well
1717       # as up ones, considering that even if they're down someone
1718       # might want to start them even in the event of a node failure.
1719       if n_img.offline:
1720         # we're skipping offline nodes from the N+1 warning, since
1721         # most likely we don't have good memory infromation from them;
1722         # we already list instances living on such nodes, and that's
1723         # enough warning
1724         continue
1725       for prinode, instances in n_img.sbp.items():
1726         needed_mem = 0
1727         for instance in instances:
1728           bep = cluster_info.FillBE(instance_cfg[instance])
1729           if bep[constants.BE_AUTO_BALANCE]:
1730             needed_mem += bep[constants.BE_MEMORY]
1731         test = n_img.mfree < needed_mem
1732         self._ErrorIf(test, self.ENODEN1, node,
1733                       "not enough memory to accomodate instance failovers"
1734                       " should node %s fail (%dMiB needed, %dMiB available)",
1735                       prinode, needed_mem, n_img.mfree)
1736
1737   @classmethod
1738   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
1739                    (files_all, files_all_opt, files_mc, files_vm)):
1740     """Verifies file checksums collected from all nodes.
1741
1742     @param errorif: Callback for reporting errors
1743     @param nodeinfo: List of L{objects.Node} objects
1744     @param master_node: Name of master node
1745     @param all_nvinfo: RPC results
1746
1747     """
1748     node_names = frozenset(node.name for node in nodeinfo)
1749
1750     assert master_node in node_names
1751     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
1752             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
1753            "Found file listed in more than one file list"
1754
1755     # Define functions determining which nodes to consider for a file
1756     file2nodefn = dict([(filename, fn)
1757       for (files, fn) in [(files_all, None),
1758                           (files_all_opt, None),
1759                           (files_mc, lambda node: (node.master_candidate or
1760                                                    node.name == master_node)),
1761                           (files_vm, lambda node: node.vm_capable)]
1762       for filename in files])
1763
1764     fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
1765
1766     for node in nodeinfo:
1767       nresult = all_nvinfo[node.name]
1768
1769       if nresult.fail_msg or not nresult.payload:
1770         node_files = None
1771       else:
1772         node_files = nresult.payload.get(constants.NV_FILELIST, None)
1773
1774       test = not (node_files and isinstance(node_files, dict))
1775       errorif(test, cls.ENODEFILECHECK, node.name,
1776               "Node did not return file checksum data")
1777       if test:
1778         continue
1779
1780       for (filename, checksum) in node_files.items():
1781         # Check if the file should be considered for a node
1782         fn = file2nodefn[filename]
1783         if fn is None or fn(node):
1784           fileinfo[filename].setdefault(checksum, set()).add(node.name)
1785
1786     for (filename, checksums) in fileinfo.items():
1787       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
1788
1789       # Nodes having the file
1790       with_file = frozenset(node_name
1791                             for nodes in fileinfo[filename].values()
1792                             for node_name in nodes)
1793
1794       # Nodes missing file
1795       missing_file = node_names - with_file
1796
1797       if filename in files_all_opt:
1798         # All or no nodes
1799         errorif(missing_file and missing_file != node_names,
1800                 cls.ECLUSTERFILECHECK, None,
1801                 "File %s is optional, but it must exist on all or no nodes (not"
1802                 " found on %s)",
1803                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
1804       else:
1805         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
1806                 "File %s is missing from node(s) %s", filename,
1807                 utils.CommaJoin(utils.NiceSort(missing_file)))
1808
1809       # See if there are multiple versions of the file
1810       test = len(checksums) > 1
1811       if test:
1812         variants = ["variant %s on %s" %
1813                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
1814                     for (idx, (checksum, nodes)) in
1815                       enumerate(sorted(checksums.items()))]
1816       else:
1817         variants = []
1818
1819       errorif(test, cls.ECLUSTERFILECHECK, None,
1820               "File %s found with %s different checksums (%s)",
1821               filename, len(checksums), "; ".join(variants))
1822
1823   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
1824                       drbd_map):
1825     """Verifies and the node DRBD status.
1826
1827     @type ninfo: L{objects.Node}
1828     @param ninfo: the node to check
1829     @param nresult: the remote results for the node
1830     @param instanceinfo: the dict of instances
1831     @param drbd_helper: the configured DRBD usermode helper
1832     @param drbd_map: the DRBD map as returned by
1833         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
1834
1835     """
1836     node = ninfo.name
1837     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1838
1839     if drbd_helper:
1840       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
1841       test = (helper_result == None)
1842       _ErrorIf(test, self.ENODEDRBDHELPER, node,
1843                "no drbd usermode helper returned")
1844       if helper_result:
1845         status, payload = helper_result
1846         test = not status
1847         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1848                  "drbd usermode helper check unsuccessful: %s", payload)
1849         test = status and (payload != drbd_helper)
1850         _ErrorIf(test, self.ENODEDRBDHELPER, node,
1851                  "wrong drbd usermode helper: %s", payload)
1852
1853     # compute the DRBD minors
1854     node_drbd = {}
1855     for minor, instance in drbd_map[node].items():
1856       test = instance not in instanceinfo
1857       _ErrorIf(test, self.ECLUSTERCFG, None,
1858                "ghost instance '%s' in temporary DRBD map", instance)
1859         # ghost instance should not be running, but otherwise we
1860         # don't give double warnings (both ghost instance and
1861         # unallocated minor in use)
1862       if test:
1863         node_drbd[minor] = (instance, False)
1864       else:
1865         instance = instanceinfo[instance]
1866         node_drbd[minor] = (instance.name, instance.admin_up)
1867
1868     # and now check them
1869     used_minors = nresult.get(constants.NV_DRBDLIST, [])
1870     test = not isinstance(used_minors, (tuple, list))
1871     _ErrorIf(test, self.ENODEDRBD, node,
1872              "cannot parse drbd status file: %s", str(used_minors))
1873     if test:
1874       # we cannot check drbd status
1875       return
1876
1877     for minor, (iname, must_exist) in node_drbd.items():
1878       test = minor not in used_minors and must_exist
1879       _ErrorIf(test, self.ENODEDRBD, node,
1880                "drbd minor %d of instance %s is not active", minor, iname)
1881     for minor in used_minors:
1882       test = minor not in node_drbd
1883       _ErrorIf(test, self.ENODEDRBD, node,
1884                "unallocated drbd minor %d is in use", minor)
1885
1886   def _UpdateNodeOS(self, ninfo, nresult, nimg):
1887     """Builds the node OS structures.
1888
1889     @type ninfo: L{objects.Node}
1890     @param ninfo: the node to check
1891     @param nresult: the remote results for the node
1892     @param nimg: the node image object
1893
1894     """
1895     node = ninfo.name
1896     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1897
1898     remote_os = nresult.get(constants.NV_OSLIST, None)
1899     test = (not isinstance(remote_os, list) or
1900             not compat.all(isinstance(v, list) and len(v) == 7
1901                            for v in remote_os))
1902
1903     _ErrorIf(test, self.ENODEOS, node,
1904              "node hasn't returned valid OS data")
1905
1906     nimg.os_fail = test
1907
1908     if test:
1909       return
1910
1911     os_dict = {}
1912
1913     for (name, os_path, status, diagnose,
1914          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
1915
1916       if name not in os_dict:
1917         os_dict[name] = []
1918
1919       # parameters is a list of lists instead of list of tuples due to
1920       # JSON lacking a real tuple type, fix it:
1921       parameters = [tuple(v) for v in parameters]
1922       os_dict[name].append((os_path, status, diagnose,
1923                             set(variants), set(parameters), set(api_ver)))
1924
1925     nimg.oslist = os_dict
1926
1927   def _VerifyNodeOS(self, ninfo, nimg, base):
1928     """Verifies the node OS list.
1929
1930     @type ninfo: L{objects.Node}
1931     @param ninfo: the node to check
1932     @param nimg: the node image object
1933     @param base: the 'template' node we match against (e.g. from the master)
1934
1935     """
1936     node = ninfo.name
1937     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1938
1939     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
1940
1941     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
1942     for os_name, os_data in nimg.oslist.items():
1943       assert os_data, "Empty OS status for OS %s?!" % os_name
1944       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
1945       _ErrorIf(not f_status, self.ENODEOS, node,
1946                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
1947       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
1948                "OS '%s' has multiple entries (first one shadows the rest): %s",
1949                os_name, utils.CommaJoin([v[0] for v in os_data]))
1950       # this will catched in backend too
1951       _ErrorIf(compat.any(v >= constants.OS_API_V15 for v in f_api)
1952                and not f_var, self.ENODEOS, node,
1953                "OS %s with API at least %d does not declare any variant",
1954                os_name, constants.OS_API_V15)
1955       # comparisons with the 'base' image
1956       test = os_name not in base.oslist
1957       _ErrorIf(test, self.ENODEOS, node,
1958                "Extra OS %s not present on reference node (%s)",
1959                os_name, base.name)
1960       if test:
1961         continue
1962       assert base.oslist[os_name], "Base node has empty OS status?"
1963       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
1964       if not b_status:
1965         # base OS is invalid, skipping
1966         continue
1967       for kind, a, b in [("API version", f_api, b_api),
1968                          ("variants list", f_var, b_var),
1969                          ("parameters", beautify_params(f_param),
1970                           beautify_params(b_param))]:
1971         _ErrorIf(a != b, self.ENODEOS, node,
1972                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
1973                  kind, os_name, base.name,
1974                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
1975
1976     # check any missing OSes
1977     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
1978     _ErrorIf(missing, self.ENODEOS, node,
1979              "OSes present on reference node %s but missing on this node: %s",
1980              base.name, utils.CommaJoin(missing))
1981
1982   def _VerifyOob(self, ninfo, nresult):
1983     """Verifies out of band functionality of a node.
1984
1985     @type ninfo: L{objects.Node}
1986     @param ninfo: the node to check
1987     @param nresult: the remote results for the node
1988
1989     """
1990     node = ninfo.name
1991     # We just have to verify the paths on master and/or master candidates
1992     # as the oob helper is invoked on the master
1993     if ((ninfo.master_candidate or ninfo.master_capable) and
1994         constants.NV_OOB_PATHS in nresult):
1995       for path_result in nresult[constants.NV_OOB_PATHS]:
1996         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
1997
1998   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
1999     """Verifies and updates the node volume data.
2000
2001     This function will update a L{NodeImage}'s internal structures
2002     with data from the remote call.
2003
2004     @type ninfo: L{objects.Node}
2005     @param ninfo: the node to check
2006     @param nresult: the remote results for the node
2007     @param nimg: the node image object
2008     @param vg_name: the configured VG name
2009
2010     """
2011     node = ninfo.name
2012     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2013
2014     nimg.lvm_fail = True
2015     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2016     if vg_name is None:
2017       pass
2018     elif isinstance(lvdata, basestring):
2019       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2020                utils.SafeEncode(lvdata))
2021     elif not isinstance(lvdata, dict):
2022       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2023     else:
2024       nimg.volumes = lvdata
2025       nimg.lvm_fail = False
2026
2027   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2028     """Verifies and updates the node instance list.
2029
2030     If the listing was successful, then updates this node's instance
2031     list. Otherwise, it marks the RPC call as failed for the instance
2032     list key.
2033
2034     @type ninfo: L{objects.Node}
2035     @param ninfo: the node to check
2036     @param nresult: the remote results for the node
2037     @param nimg: the node image object
2038
2039     """
2040     idata = nresult.get(constants.NV_INSTANCELIST, None)
2041     test = not isinstance(idata, list)
2042     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2043                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2044     if test:
2045       nimg.hyp_fail = True
2046     else:
2047       nimg.instances = idata
2048
2049   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2050     """Verifies and computes a node information map
2051
2052     @type ninfo: L{objects.Node}
2053     @param ninfo: the node to check
2054     @param nresult: the remote results for the node
2055     @param nimg: the node image object
2056     @param vg_name: the configured VG name
2057
2058     """
2059     node = ninfo.name
2060     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2061
2062     # try to read free memory (from the hypervisor)
2063     hv_info = nresult.get(constants.NV_HVINFO, None)
2064     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2065     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2066     if not test:
2067       try:
2068         nimg.mfree = int(hv_info["memory_free"])
2069       except (ValueError, TypeError):
2070         _ErrorIf(True, self.ENODERPC, node,
2071                  "node returned invalid nodeinfo, check hypervisor")
2072
2073     # FIXME: devise a free space model for file based instances as well
2074     if vg_name is not None:
2075       test = (constants.NV_VGLIST not in nresult or
2076               vg_name not in nresult[constants.NV_VGLIST])
2077       _ErrorIf(test, self.ENODELVM, node,
2078                "node didn't return data for the volume group '%s'"
2079                " - it is either missing or broken", vg_name)
2080       if not test:
2081         try:
2082           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2083         except (ValueError, TypeError):
2084           _ErrorIf(True, self.ENODERPC, node,
2085                    "node returned invalid LVM info, check LVM status")
2086
2087   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2088     """Gets per-disk status information for all instances.
2089
2090     @type nodelist: list of strings
2091     @param nodelist: Node names
2092     @type node_image: dict of (name, L{objects.Node})
2093     @param node_image: Node objects
2094     @type instanceinfo: dict of (name, L{objects.Instance})
2095     @param instanceinfo: Instance objects
2096     @rtype: {instance: {node: [(succes, payload)]}}
2097     @return: a dictionary of per-instance dictionaries with nodes as
2098         keys and disk information as values; the disk information is a
2099         list of tuples (success, payload)
2100
2101     """
2102     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2103
2104     node_disks = {}
2105     node_disks_devonly = {}
2106     diskless_instances = set()
2107     diskless = constants.DT_DISKLESS
2108
2109     for nname in nodelist:
2110       node_instances = list(itertools.chain(node_image[nname].pinst,
2111                                             node_image[nname].sinst))
2112       diskless_instances.update(inst for inst in node_instances
2113                                 if instanceinfo[inst].disk_template == diskless)
2114       disks = [(inst, disk)
2115                for inst in node_instances
2116                for disk in instanceinfo[inst].disks]
2117
2118       if not disks:
2119         # No need to collect data
2120         continue
2121
2122       node_disks[nname] = disks
2123
2124       # Creating copies as SetDiskID below will modify the objects and that can
2125       # lead to incorrect data returned from nodes
2126       devonly = [dev.Copy() for (_, dev) in disks]
2127
2128       for dev in devonly:
2129         self.cfg.SetDiskID(dev, nname)
2130
2131       node_disks_devonly[nname] = devonly
2132
2133     assert len(node_disks) == len(node_disks_devonly)
2134
2135     # Collect data from all nodes with disks
2136     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2137                                                           node_disks_devonly)
2138
2139     assert len(result) == len(node_disks)
2140
2141     instdisk = {}
2142
2143     for (nname, nres) in result.items():
2144       disks = node_disks[nname]
2145
2146       if nres.offline:
2147         # No data from this node
2148         data = len(disks) * [(False, "node offline")]
2149       else:
2150         msg = nres.fail_msg
2151         _ErrorIf(msg, self.ENODERPC, nname,
2152                  "while getting disk information: %s", msg)
2153         if msg:
2154           # No data from this node
2155           data = len(disks) * [(False, msg)]
2156         else:
2157           data = []
2158           for idx, i in enumerate(nres.payload):
2159             if isinstance(i, (tuple, list)) and len(i) == 2:
2160               data.append(i)
2161             else:
2162               logging.warning("Invalid result from node %s, entry %d: %s",
2163                               nname, idx, i)
2164               data.append((False, "Invalid result from the remote node"))
2165
2166       for ((inst, _), status) in zip(disks, data):
2167         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2168
2169     # Add empty entries for diskless instances.
2170     for inst in diskless_instances:
2171       assert inst not in instdisk
2172       instdisk[inst] = {}
2173
2174     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2175                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2176                       compat.all(isinstance(s, (tuple, list)) and
2177                                  len(s) == 2 for s in statuses)
2178                       for inst, nnames in instdisk.items()
2179                       for nname, statuses in nnames.items())
2180     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2181
2182     return instdisk
2183
2184   def _VerifyHVP(self, hvp_data):
2185     """Verifies locally the syntax of the hypervisor parameters.
2186
2187     """
2188     for item, hv_name, hv_params in hvp_data:
2189       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
2190              (item, hv_name))
2191       try:
2192         hv_class = hypervisor.GetHypervisor(hv_name)
2193         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
2194         hv_class.CheckParameterSyntax(hv_params)
2195       except errors.GenericError, err:
2196         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
2197
2198   def BuildHooksEnv(self):
2199     """Build hooks env.
2200
2201     Cluster-Verify hooks just ran in the post phase and their failure makes
2202     the output be logged in the verify output and the verification to fail.
2203
2204     """
2205     cfg = self.cfg
2206
2207     env = {
2208       "CLUSTER_TAGS": " ".join(cfg.GetClusterInfo().GetTags())
2209       }
2210
2211     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2212                for node in cfg.GetAllNodesInfo().values())
2213
2214     return env
2215
2216   def BuildHooksNodes(self):
2217     """Build hooks nodes.
2218
2219     """
2220     return ([], self.cfg.GetNodeList())
2221
2222   def Exec(self, feedback_fn):
2223     """Verify integrity of cluster, performing various test on nodes.
2224
2225     """
2226     # This method has too many local variables. pylint: disable-msg=R0914
2227     self.bad = False
2228     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2229     verbose = self.op.verbose
2230     self._feedback_fn = feedback_fn
2231     feedback_fn("* Verifying global settings")
2232     for msg in self.cfg.VerifyConfig():
2233       _ErrorIf(True, self.ECLUSTERCFG, None, msg)
2234
2235     # Check the cluster certificates
2236     for cert_filename in constants.ALL_CERT_FILES:
2237       (errcode, msg) = _VerifyCertificate(cert_filename)
2238       _ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
2239
2240     vg_name = self.cfg.GetVGName()
2241     drbd_helper = self.cfg.GetDRBDHelper()
2242     hypervisors = self.cfg.GetClusterInfo().enabled_hypervisors
2243     cluster = self.cfg.GetClusterInfo()
2244     nodelist = utils.NiceSort(self.cfg.GetNodeList())
2245     nodeinfo = [self.cfg.GetNodeInfo(nname) for nname in nodelist]
2246     nodeinfo_byname = dict(zip(nodelist, nodeinfo))
2247     instancelist = utils.NiceSort(self.cfg.GetInstanceList())
2248     instanceinfo = dict((iname, self.cfg.GetInstanceInfo(iname))
2249                         for iname in instancelist)
2250     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2251     i_non_redundant = [] # Non redundant instances
2252     i_non_a_balanced = [] # Non auto-balanced instances
2253     n_offline = 0 # Count of offline nodes
2254     n_drained = 0 # Count of nodes being drained
2255     node_vol_should = {}
2256
2257     # FIXME: verify OS list
2258
2259     # File verification
2260     filemap = _ComputeAncillaryFiles(cluster, False)
2261
2262     # do local checksums
2263     master_node = self.master_node = self.cfg.GetMasterNode()
2264     master_ip = self.cfg.GetMasterIP()
2265
2266     # Compute the set of hypervisor parameters
2267     hvp_data = []
2268     for hv_name in hypervisors:
2269       hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
2270     for os_name, os_hvp in cluster.os_hvp.items():
2271       for hv_name, hv_params in os_hvp.items():
2272         if not hv_params:
2273           continue
2274         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
2275         hvp_data.append(("os %s" % os_name, hv_name, full_params))
2276     # TODO: collapse identical parameter values in a single one
2277     for instance in instanceinfo.values():
2278       if not instance.hvparams:
2279         continue
2280       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
2281                        cluster.FillHV(instance)))
2282     # and verify them locally
2283     self._VerifyHVP(hvp_data)
2284
2285     feedback_fn("* Gathering data (%d nodes)" % len(nodelist))
2286     node_verify_param = {
2287       constants.NV_FILELIST:
2288         utils.UniqueSequence(filename
2289                              for files in filemap
2290                              for filename in files),
2291       constants.NV_NODELIST: [node.name for node in nodeinfo
2292                               if not node.offline],
2293       constants.NV_HYPERVISOR: hypervisors,
2294       constants.NV_HVPARAMS: hvp_data,
2295       constants.NV_NODENETTEST: [(node.name, node.primary_ip,
2296                                   node.secondary_ip) for node in nodeinfo
2297                                  if not node.offline],
2298       constants.NV_INSTANCELIST: hypervisors,
2299       constants.NV_VERSION: None,
2300       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2301       constants.NV_NODESETUP: None,
2302       constants.NV_TIME: None,
2303       constants.NV_MASTERIP: (master_node, master_ip),
2304       constants.NV_OSLIST: None,
2305       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2306       }
2307
2308     if vg_name is not None:
2309       node_verify_param[constants.NV_VGLIST] = None
2310       node_verify_param[constants.NV_LVLIST] = vg_name
2311       node_verify_param[constants.NV_PVLIST] = [vg_name]
2312       node_verify_param[constants.NV_DRBDLIST] = None
2313
2314     if drbd_helper:
2315       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2316
2317     # Build our expected cluster state
2318     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2319                                                  name=node.name,
2320                                                  vm_capable=node.vm_capable))
2321                       for node in nodeinfo)
2322
2323     # Gather OOB paths
2324     oob_paths = []
2325     for node in nodeinfo:
2326       path = _SupportsOob(self.cfg, node)
2327       if path and path not in oob_paths:
2328         oob_paths.append(path)
2329
2330     if oob_paths:
2331       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2332
2333     for instance in instancelist:
2334       inst_config = instanceinfo[instance]
2335
2336       for nname in inst_config.all_nodes:
2337         if nname not in node_image:
2338           # ghost node
2339           gnode = self.NodeImage(name=nname)
2340           gnode.ghost = True
2341           node_image[nname] = gnode
2342
2343       inst_config.MapLVsByNode(node_vol_should)
2344
2345       pnode = inst_config.primary_node
2346       node_image[pnode].pinst.append(instance)
2347
2348       for snode in inst_config.secondary_nodes:
2349         nimg = node_image[snode]
2350         nimg.sinst.append(instance)
2351         if pnode not in nimg.sbp:
2352           nimg.sbp[pnode] = []
2353         nimg.sbp[pnode].append(instance)
2354
2355     # At this point, we have the in-memory data structures complete,
2356     # except for the runtime information, which we'll gather next
2357
2358     # Due to the way our RPC system works, exact response times cannot be
2359     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2360     # time before and after executing the request, we can at least have a time
2361     # window.
2362     nvinfo_starttime = time.time()
2363     all_nvinfo = self.rpc.call_node_verify(nodelist, node_verify_param,
2364                                            self.cfg.GetClusterName())
2365     nvinfo_endtime = time.time()
2366
2367     all_drbd_map = self.cfg.ComputeDRBDMap()
2368
2369     feedback_fn("* Gathering disk information (%s nodes)" % len(nodelist))
2370     instdisk = self._CollectDiskInfo(nodelist, node_image, instanceinfo)
2371
2372     feedback_fn("* Verifying configuration file consistency")
2373     self._VerifyFiles(_ErrorIf, nodeinfo, master_node, all_nvinfo, filemap)
2374
2375     feedback_fn("* Verifying node status")
2376
2377     refos_img = None
2378
2379     for node_i in nodeinfo:
2380       node = node_i.name
2381       nimg = node_image[node]
2382
2383       if node_i.offline:
2384         if verbose:
2385           feedback_fn("* Skipping offline node %s" % (node,))
2386         n_offline += 1
2387         continue
2388
2389       if node == master_node:
2390         ntype = "master"
2391       elif node_i.master_candidate:
2392         ntype = "master candidate"
2393       elif node_i.drained:
2394         ntype = "drained"
2395         n_drained += 1
2396       else:
2397         ntype = "regular"
2398       if verbose:
2399         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2400
2401       msg = all_nvinfo[node].fail_msg
2402       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2403       if msg:
2404         nimg.rpc_fail = True
2405         continue
2406
2407       nresult = all_nvinfo[node].payload
2408
2409       nimg.call_ok = self._VerifyNode(node_i, nresult)
2410       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2411       self._VerifyNodeNetwork(node_i, nresult)
2412       self._VerifyOob(node_i, nresult)
2413
2414       if nimg.vm_capable:
2415         self._VerifyNodeLVM(node_i, nresult, vg_name)
2416         self._VerifyNodeDrbd(node_i, nresult, instanceinfo, drbd_helper,
2417                              all_drbd_map)
2418
2419         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2420         self._UpdateNodeInstances(node_i, nresult, nimg)
2421         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2422         self._UpdateNodeOS(node_i, nresult, nimg)
2423         if not nimg.os_fail:
2424           if refos_img is None:
2425             refos_img = nimg
2426           self._VerifyNodeOS(node_i, nimg, refos_img)
2427
2428     feedback_fn("* Verifying instance status")
2429     for instance in instancelist:
2430       if verbose:
2431         feedback_fn("* Verifying instance %s" % instance)
2432       inst_config = instanceinfo[instance]
2433       self._VerifyInstance(instance, inst_config, node_image,
2434                            instdisk[instance])
2435       inst_nodes_offline = []
2436
2437       pnode = inst_config.primary_node
2438       pnode_img = node_image[pnode]
2439       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2440                self.ENODERPC, pnode, "instance %s, connection to"
2441                " primary node failed", instance)
2442
2443       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2444                self.EINSTANCEBADNODE, instance,
2445                "instance is marked as running and lives on offline node %s",
2446                inst_config.primary_node)
2447
2448       # If the instance is non-redundant we cannot survive losing its primary
2449       # node, so we are not N+1 compliant. On the other hand we have no disk
2450       # templates with more than one secondary so that situation is not well
2451       # supported either.
2452       # FIXME: does not support file-backed instances
2453       if not inst_config.secondary_nodes:
2454         i_non_redundant.append(instance)
2455
2456       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2457                instance, "instance has multiple secondary nodes: %s",
2458                utils.CommaJoin(inst_config.secondary_nodes),
2459                code=self.ETYPE_WARNING)
2460
2461       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2462         pnode = inst_config.primary_node
2463         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2464         instance_groups = {}
2465
2466         for node in instance_nodes:
2467           instance_groups.setdefault(nodeinfo_byname[node].group,
2468                                      []).append(node)
2469
2470         pretty_list = [
2471           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2472           # Sort so that we always list the primary node first.
2473           for group, nodes in sorted(instance_groups.items(),
2474                                      key=lambda (_, nodes): pnode in nodes,
2475                                      reverse=True)]
2476
2477         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2478                       instance, "instance has primary and secondary nodes in"
2479                       " different groups: %s", utils.CommaJoin(pretty_list),
2480                       code=self.ETYPE_WARNING)
2481
2482       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2483         i_non_a_balanced.append(instance)
2484
2485       for snode in inst_config.secondary_nodes:
2486         s_img = node_image[snode]
2487         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2488                  "instance %s, connection to secondary node failed", instance)
2489
2490         if s_img.offline:
2491           inst_nodes_offline.append(snode)
2492
2493       # warn that the instance lives on offline nodes
2494       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2495                "instance has offline secondary node(s) %s",
2496                utils.CommaJoin(inst_nodes_offline))
2497       # ... or ghost/non-vm_capable nodes
2498       for node in inst_config.all_nodes:
2499         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2500                  "instance lives on ghost node %s", node)
2501         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2502                  instance, "instance lives on non-vm_capable node %s", node)
2503
2504     feedback_fn("* Verifying orphan volumes")
2505     reserved = utils.FieldSet(*cluster.reserved_lvs)
2506     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2507
2508     feedback_fn("* Verifying orphan instances")
2509     self._VerifyOrphanInstances(instancelist, node_image)
2510
2511     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2512       feedback_fn("* Verifying N+1 Memory redundancy")
2513       self._VerifyNPlusOneMemory(node_image, instanceinfo)
2514
2515     feedback_fn("* Other Notes")
2516     if i_non_redundant:
2517       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2518                   % len(i_non_redundant))
2519
2520     if i_non_a_balanced:
2521       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2522                   % len(i_non_a_balanced))
2523
2524     if n_offline:
2525       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2526
2527     if n_drained:
2528       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2529
2530     return not self.bad
2531
2532   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2533     """Analyze the post-hooks' result
2534
2535     This method analyses the hook result, handles it, and sends some
2536     nicely-formatted feedback back to the user.
2537
2538     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2539         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2540     @param hooks_results: the results of the multi-node hooks rpc call
2541     @param feedback_fn: function used send feedback back to the caller
2542     @param lu_result: previous Exec result
2543     @return: the new Exec result, based on the previous result
2544         and hook results
2545
2546     """
2547     # We only really run POST phase hooks, and are only interested in
2548     # their results
2549     if phase == constants.HOOKS_PHASE_POST:
2550       # Used to change hooks' output to proper indentation
2551       feedback_fn("* Hooks Results")
2552       assert hooks_results, "invalid result from hooks"
2553
2554       for node_name in hooks_results:
2555         res = hooks_results[node_name]
2556         msg = res.fail_msg
2557         test = msg and not res.offline
2558         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2559                       "Communication failure in hooks execution: %s", msg)
2560         if res.offline or msg:
2561           # No need to investigate payload if node is offline or gave an error.
2562           # override manually lu_result here as _ErrorIf only
2563           # overrides self.bad
2564           lu_result = 1
2565           continue
2566         for script, hkr, output in res.payload:
2567           test = hkr == constants.HKR_FAIL
2568           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2569                         "Script %s failed, output:", script)
2570           if test:
2571             output = self._HOOKS_INDENT_RE.sub('      ', output)
2572             feedback_fn("%s" % output)
2573             lu_result = 0
2574
2575       return lu_result
2576
2577
2578 class LUClusterVerifyDisks(NoHooksLU):
2579   """Verifies the cluster disks status.
2580
2581   """
2582   REQ_BGL = False
2583
2584   def ExpandNames(self):
2585     self.needed_locks = {
2586       locking.LEVEL_NODE: locking.ALL_SET,
2587       locking.LEVEL_INSTANCE: locking.ALL_SET,
2588     }
2589     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
2590
2591   def Exec(self, feedback_fn):
2592     """Verify integrity of cluster disks.
2593
2594     @rtype: tuple of three items
2595     @return: a tuple of (dict of node-to-node_error, list of instances
2596         which need activate-disks, dict of instance: (node, volume) for
2597         missing volumes
2598
2599     """
2600     result = res_nodes, res_instances, res_missing = {}, [], {}
2601
2602     nodes = utils.NiceSort(self.cfg.GetVmCapableNodeList())
2603     instances = self.cfg.GetAllInstancesInfo().values()
2604
2605     nv_dict = {}
2606     for inst in instances:
2607       inst_lvs = {}
2608       if not inst.admin_up:
2609         continue
2610       inst.MapLVsByNode(inst_lvs)
2611       # transform { iname: {node: [vol,],},} to {(node, vol): iname}
2612       for node, vol_list in inst_lvs.iteritems():
2613         for vol in vol_list:
2614           nv_dict[(node, vol)] = inst
2615
2616     if not nv_dict:
2617       return result
2618
2619     node_lvs = self.rpc.call_lv_list(nodes, [])
2620     for node, node_res in node_lvs.items():
2621       if node_res.offline:
2622         continue
2623       msg = node_res.fail_msg
2624       if msg:
2625         logging.warning("Error enumerating LVs on node %s: %s", node, msg)
2626         res_nodes[node] = msg
2627         continue
2628
2629       lvs = node_res.payload
2630       for lv_name, (_, _, lv_online) in lvs.items():
2631         inst = nv_dict.pop((node, lv_name), None)
2632         if (not lv_online and inst is not None
2633             and inst.name not in res_instances):
2634           res_instances.append(inst.name)
2635
2636     # any leftover items in nv_dict are missing LVs, let's arrange the
2637     # data better
2638     for key, inst in nv_dict.iteritems():
2639       if inst.name not in res_missing:
2640         res_missing[inst.name] = []
2641       res_missing[inst.name].append(key)
2642
2643     return result
2644
2645
2646 class LUClusterRepairDiskSizes(NoHooksLU):
2647   """Verifies the cluster disks sizes.
2648
2649   """
2650   REQ_BGL = False
2651
2652   def ExpandNames(self):
2653     if self.op.instances:
2654       self.wanted_names = []
2655       for name in self.op.instances:
2656         full_name = _ExpandInstanceName(self.cfg, name)
2657         self.wanted_names.append(full_name)
2658       self.needed_locks = {
2659         locking.LEVEL_NODE: [],
2660         locking.LEVEL_INSTANCE: self.wanted_names,
2661         }
2662       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
2663     else:
2664       self.wanted_names = None
2665       self.needed_locks = {
2666         locking.LEVEL_NODE: locking.ALL_SET,
2667         locking.LEVEL_INSTANCE: locking.ALL_SET,
2668         }
2669     self.share_locks = dict(((i, 1) for i in locking.LEVELS))
2670
2671   def DeclareLocks(self, level):
2672     if level == locking.LEVEL_NODE and self.wanted_names is not None:
2673       self._LockInstancesNodes(primary_only=True)
2674
2675   def CheckPrereq(self):
2676     """Check prerequisites.
2677
2678     This only checks the optional instance list against the existing names.
2679
2680     """
2681     if self.wanted_names is None:
2682       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
2683
2684     self.wanted_instances = [self.cfg.GetInstanceInfo(name) for name
2685                              in self.wanted_names]
2686
2687   def _EnsureChildSizes(self, disk):
2688     """Ensure children of the disk have the needed disk size.
2689
2690     This is valid mainly for DRBD8 and fixes an issue where the
2691     children have smaller disk size.
2692
2693     @param disk: an L{ganeti.objects.Disk} object
2694
2695     """
2696     if disk.dev_type == constants.LD_DRBD8:
2697       assert disk.children, "Empty children for DRBD8?"
2698       fchild = disk.children[0]
2699       mismatch = fchild.size < disk.size
2700       if mismatch:
2701         self.LogInfo("Child disk has size %d, parent %d, fixing",
2702                      fchild.size, disk.size)
2703         fchild.size = disk.size
2704
2705       # and we recurse on this child only, not on the metadev
2706       return self._EnsureChildSizes(fchild) or mismatch
2707     else:
2708       return False
2709
2710   def Exec(self, feedback_fn):
2711     """Verify the size of cluster disks.
2712
2713     """
2714     # TODO: check child disks too
2715     # TODO: check differences in size between primary/secondary nodes
2716     per_node_disks = {}
2717     for instance in self.wanted_instances:
2718       pnode = instance.primary_node
2719       if pnode not in per_node_disks:
2720         per_node_disks[pnode] = []
2721       for idx, disk in enumerate(instance.disks):
2722         per_node_disks[pnode].append((instance, idx, disk))
2723
2724     changed = []
2725     for node, dskl in per_node_disks.items():
2726       newl = [v[2].Copy() for v in dskl]
2727       for dsk in newl:
2728         self.cfg.SetDiskID(dsk, node)
2729       result = self.rpc.call_blockdev_getsize(node, newl)
2730       if result.fail_msg:
2731         self.LogWarning("Failure in blockdev_getsize call to node"
2732                         " %s, ignoring", node)
2733         continue
2734       if len(result.payload) != len(dskl):
2735         logging.warning("Invalid result from node %s: len(dksl)=%d,"
2736                         " result.payload=%s", node, len(dskl), result.payload)
2737         self.LogWarning("Invalid result from node %s, ignoring node results",
2738                         node)
2739         continue
2740       for ((instance, idx, disk), size) in zip(dskl, result.payload):
2741         if size is None:
2742           self.LogWarning("Disk %d of instance %s did not return size"
2743                           " information, ignoring", idx, instance.name)
2744           continue
2745         if not isinstance(size, (int, long)):
2746           self.LogWarning("Disk %d of instance %s did not return valid"
2747                           " size information, ignoring", idx, instance.name)
2748           continue
2749         size = size >> 20
2750         if size != disk.size:
2751           self.LogInfo("Disk %d of instance %s has mismatched size,"
2752                        " correcting: recorded %d, actual %d", idx,
2753                        instance.name, disk.size, size)
2754           disk.size = size
2755           self.cfg.Update(instance, feedback_fn)
2756           changed.append((instance.name, idx, size))
2757         if self._EnsureChildSizes(disk):
2758           self.cfg.Update(instance, feedback_fn)
2759           changed.append((instance.name, idx, disk.size))
2760     return changed
2761
2762
2763 class LUClusterRename(LogicalUnit):
2764   """Rename the cluster.
2765
2766   """
2767   HPATH = "cluster-rename"
2768   HTYPE = constants.HTYPE_CLUSTER
2769
2770   def BuildHooksEnv(self):
2771     """Build hooks env.
2772
2773     """
2774     return {
2775       "OP_TARGET": self.cfg.GetClusterName(),
2776       "NEW_NAME": self.op.name,
2777       }
2778
2779   def BuildHooksNodes(self):
2780     """Build hooks nodes.
2781
2782     """
2783     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
2784
2785   def CheckPrereq(self):
2786     """Verify that the passed name is a valid one.
2787
2788     """
2789     hostname = netutils.GetHostname(name=self.op.name,
2790                                     family=self.cfg.GetPrimaryIPFamily())
2791
2792     new_name = hostname.name
2793     self.ip = new_ip = hostname.ip
2794     old_name = self.cfg.GetClusterName()
2795     old_ip = self.cfg.GetMasterIP()
2796     if new_name == old_name and new_ip == old_ip:
2797       raise errors.OpPrereqError("Neither the name nor the IP address of the"
2798                                  " cluster has changed",
2799                                  errors.ECODE_INVAL)
2800     if new_ip != old_ip:
2801       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
2802         raise errors.OpPrereqError("The given cluster IP address (%s) is"
2803                                    " reachable on the network" %
2804                                    new_ip, errors.ECODE_NOTUNIQUE)
2805
2806     self.op.name = new_name
2807
2808   def Exec(self, feedback_fn):
2809     """Rename the cluster.
2810
2811     """
2812     clustername = self.op.name
2813     ip = self.ip
2814
2815     # shutdown the master IP
2816     master = self.cfg.GetMasterNode()
2817     result = self.rpc.call_node_stop_master(master, False)
2818     result.Raise("Could not disable the master role")
2819
2820     try:
2821       cluster = self.cfg.GetClusterInfo()
2822       cluster.cluster_name = clustername
2823       cluster.master_ip = ip
2824       self.cfg.Update(cluster, feedback_fn)
2825
2826       # update the known hosts file
2827       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
2828       node_list = self.cfg.GetOnlineNodeList()
2829       try:
2830         node_list.remove(master)
2831       except ValueError:
2832         pass
2833       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
2834     finally:
2835       result = self.rpc.call_node_start_master(master, False, False)
2836       msg = result.fail_msg
2837       if msg:
2838         self.LogWarning("Could not re-enable the master role on"
2839                         " the master, please restart manually: %s", msg)
2840
2841     return clustername
2842
2843
2844 class LUClusterSetParams(LogicalUnit):
2845   """Change the parameters of the cluster.
2846
2847   """
2848   HPATH = "cluster-modify"
2849   HTYPE = constants.HTYPE_CLUSTER
2850   REQ_BGL = False
2851
2852   def CheckArguments(self):
2853     """Check parameters
2854
2855     """
2856     if self.op.uid_pool:
2857       uidpool.CheckUidPool(self.op.uid_pool)
2858
2859     if self.op.add_uids:
2860       uidpool.CheckUidPool(self.op.add_uids)
2861
2862     if self.op.remove_uids:
2863       uidpool.CheckUidPool(self.op.remove_uids)
2864
2865   def ExpandNames(self):
2866     # FIXME: in the future maybe other cluster params won't require checking on
2867     # all nodes to be modified.
2868     self.needed_locks = {
2869       locking.LEVEL_NODE: locking.ALL_SET,
2870     }
2871     self.share_locks[locking.LEVEL_NODE] = 1
2872
2873   def BuildHooksEnv(self):
2874     """Build hooks env.
2875
2876     """
2877     return {
2878       "OP_TARGET": self.cfg.GetClusterName(),
2879       "NEW_VG_NAME": self.op.vg_name,
2880       }
2881
2882   def BuildHooksNodes(self):
2883     """Build hooks nodes.
2884
2885     """
2886     mn = self.cfg.GetMasterNode()
2887     return ([mn], [mn])
2888
2889   def CheckPrereq(self):
2890     """Check prerequisites.
2891
2892     This checks whether the given params don't conflict and
2893     if the given volume group is valid.
2894
2895     """
2896     if self.op.vg_name is not None and not self.op.vg_name:
2897       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
2898         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
2899                                    " instances exist", errors.ECODE_INVAL)
2900
2901     if self.op.drbd_helper is not None and not self.op.drbd_helper:
2902       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
2903         raise errors.OpPrereqError("Cannot disable drbd helper while"
2904                                    " drbd-based instances exist",
2905                                    errors.ECODE_INVAL)
2906
2907     node_list = self.acquired_locks[locking.LEVEL_NODE]
2908
2909     # if vg_name not None, checks given volume group on all nodes
2910     if self.op.vg_name:
2911       vglist = self.rpc.call_vg_list(node_list)
2912       for node in node_list:
2913         msg = vglist[node].fail_msg
2914         if msg:
2915           # ignoring down node
2916           self.LogWarning("Error while gathering data on node %s"
2917                           " (ignoring node): %s", node, msg)
2918           continue
2919         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
2920                                               self.op.vg_name,
2921                                               constants.MIN_VG_SIZE)
2922         if vgstatus:
2923           raise errors.OpPrereqError("Error on node '%s': %s" %
2924                                      (node, vgstatus), errors.ECODE_ENVIRON)
2925
2926     if self.op.drbd_helper:
2927       # checks given drbd helper on all nodes
2928       helpers = self.rpc.call_drbd_helper(node_list)
2929       for node in node_list:
2930         ninfo = self.cfg.GetNodeInfo(node)
2931         if ninfo.offline:
2932           self.LogInfo("Not checking drbd helper on offline node %s", node)
2933           continue
2934         msg = helpers[node].fail_msg
2935         if msg:
2936           raise errors.OpPrereqError("Error checking drbd helper on node"
2937                                      " '%s': %s" % (node, msg),
2938                                      errors.ECODE_ENVIRON)
2939         node_helper = helpers[node].payload
2940         if node_helper != self.op.drbd_helper:
2941           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
2942                                      (node, node_helper), errors.ECODE_ENVIRON)
2943
2944     self.cluster = cluster = self.cfg.GetClusterInfo()
2945     # validate params changes
2946     if self.op.beparams:
2947       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
2948       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
2949
2950     if self.op.ndparams:
2951       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
2952       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
2953
2954       # TODO: we need a more general way to handle resetting
2955       # cluster-level parameters to default values
2956       if self.new_ndparams["oob_program"] == "":
2957         self.new_ndparams["oob_program"] = \
2958             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
2959
2960     if self.op.nicparams:
2961       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
2962       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
2963       objects.NIC.CheckParameterSyntax(self.new_nicparams)
2964       nic_errors = []
2965
2966       # check all instances for consistency
2967       for instance in self.cfg.GetAllInstancesInfo().values():
2968         for nic_idx, nic in enumerate(instance.nics):
2969           params_copy = copy.deepcopy(nic.nicparams)
2970           params_filled = objects.FillDict(self.new_nicparams, params_copy)
2971
2972           # check parameter syntax
2973           try:
2974             objects.NIC.CheckParameterSyntax(params_filled)
2975           except errors.ConfigurationError, err:
2976             nic_errors.append("Instance %s, nic/%d: %s" %
2977                               (instance.name, nic_idx, err))
2978
2979           # if we're moving instances to routed, check that they have an ip
2980           target_mode = params_filled[constants.NIC_MODE]
2981           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
2982             nic_errors.append("Instance %s, nic/%d: routed nick with no ip" %
2983                               (instance.name, nic_idx))
2984       if nic_errors:
2985         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
2986                                    "\n".join(nic_errors))
2987
2988     # hypervisor list/parameters
2989     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
2990     if self.op.hvparams:
2991       for hv_name, hv_dict in self.op.hvparams.items():
2992         if hv_name not in self.new_hvparams:
2993           self.new_hvparams[hv_name] = hv_dict
2994         else:
2995           self.new_hvparams[hv_name].update(hv_dict)
2996
2997     # os hypervisor parameters
2998     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
2999     if self.op.os_hvp:
3000       for os_name, hvs in self.op.os_hvp.items():
3001         if os_name not in self.new_os_hvp:
3002           self.new_os_hvp[os_name] = hvs
3003         else:
3004           for hv_name, hv_dict in hvs.items():
3005             if hv_name not in self.new_os_hvp[os_name]:
3006               self.new_os_hvp[os_name][hv_name] = hv_dict
3007             else:
3008               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3009
3010     # os parameters
3011     self.new_osp = objects.FillDict(cluster.osparams, {})
3012     if self.op.osparams:
3013       for os_name, osp in self.op.osparams.items():
3014         if os_name not in self.new_osp:
3015           self.new_osp[os_name] = {}
3016
3017         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3018                                                   use_none=True)
3019
3020         if not self.new_osp[os_name]:
3021           # we removed all parameters
3022           del self.new_osp[os_name]
3023         else:
3024           # check the parameter validity (remote check)
3025           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3026                          os_name, self.new_osp[os_name])
3027
3028     # changes to the hypervisor list
3029     if self.op.enabled_hypervisors is not None:
3030       self.hv_list = self.op.enabled_hypervisors
3031       for hv in self.hv_list:
3032         # if the hypervisor doesn't already exist in the cluster
3033         # hvparams, we initialize it to empty, and then (in both
3034         # cases) we make sure to fill the defaults, as we might not
3035         # have a complete defaults list if the hypervisor wasn't
3036         # enabled before
3037         if hv not in new_hvp:
3038           new_hvp[hv] = {}
3039         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3040         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3041     else:
3042       self.hv_list = cluster.enabled_hypervisors
3043
3044     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3045       # either the enabled list has changed, or the parameters have, validate
3046       for hv_name, hv_params in self.new_hvparams.items():
3047         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3048             (self.op.enabled_hypervisors and
3049              hv_name in self.op.enabled_hypervisors)):
3050           # either this is a new hypervisor, or its parameters have changed
3051           hv_class = hypervisor.GetHypervisor(hv_name)
3052           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3053           hv_class.CheckParameterSyntax(hv_params)
3054           _CheckHVParams(self, node_list, hv_name, hv_params)
3055
3056     if self.op.os_hvp:
3057       # no need to check any newly-enabled hypervisors, since the
3058       # defaults have already been checked in the above code-block
3059       for os_name, os_hvp in self.new_os_hvp.items():
3060         for hv_name, hv_params in os_hvp.items():
3061           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3062           # we need to fill in the new os_hvp on top of the actual hv_p
3063           cluster_defaults = self.new_hvparams.get(hv_name, {})
3064           new_osp = objects.FillDict(cluster_defaults, hv_params)
3065           hv_class = hypervisor.GetHypervisor(hv_name)
3066           hv_class.CheckParameterSyntax(new_osp)
3067           _CheckHVParams(self, node_list, hv_name, new_osp)
3068
3069     if self.op.default_iallocator:
3070       alloc_script = utils.FindFile(self.op.default_iallocator,
3071                                     constants.IALLOCATOR_SEARCH_PATH,
3072                                     os.path.isfile)
3073       if alloc_script is None:
3074         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3075                                    " specified" % self.op.default_iallocator,
3076                                    errors.ECODE_INVAL)
3077
3078   def Exec(self, feedback_fn):
3079     """Change the parameters of the cluster.
3080
3081     """
3082     if self.op.vg_name is not None:
3083       new_volume = self.op.vg_name
3084       if not new_volume:
3085         new_volume = None
3086       if new_volume != self.cfg.GetVGName():
3087         self.cfg.SetVGName(new_volume)
3088       else:
3089         feedback_fn("Cluster LVM configuration already in desired"
3090                     " state, not changing")
3091     if self.op.drbd_helper is not None:
3092       new_helper = self.op.drbd_helper
3093       if not new_helper:
3094         new_helper = None
3095       if new_helper != self.cfg.GetDRBDHelper():
3096         self.cfg.SetDRBDHelper(new_helper)
3097       else:
3098         feedback_fn("Cluster DRBD helper already in desired state,"
3099                     " not changing")
3100     if self.op.hvparams:
3101       self.cluster.hvparams = self.new_hvparams
3102     if self.op.os_hvp:
3103       self.cluster.os_hvp = self.new_os_hvp
3104     if self.op.enabled_hypervisors is not None:
3105       self.cluster.hvparams = self.new_hvparams
3106       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3107     if self.op.beparams:
3108       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3109     if self.op.nicparams:
3110       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3111     if self.op.osparams:
3112       self.cluster.osparams = self.new_osp
3113     if self.op.ndparams:
3114       self.cluster.ndparams = self.new_ndparams
3115
3116     if self.op.candidate_pool_size is not None:
3117       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3118       # we need to update the pool size here, otherwise the save will fail
3119       _AdjustCandidatePool(self, [])
3120
3121     if self.op.maintain_node_health is not None:
3122       self.cluster.maintain_node_health = self.op.maintain_node_health
3123
3124     if self.op.prealloc_wipe_disks is not None:
3125       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3126
3127     if self.op.add_uids is not None:
3128       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3129
3130     if self.op.remove_uids is not None:
3131       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3132
3133     if self.op.uid_pool is not None:
3134       self.cluster.uid_pool = self.op.uid_pool
3135
3136     if self.op.default_iallocator is not None:
3137       self.cluster.default_iallocator = self.op.default_iallocator
3138
3139     if self.op.reserved_lvs is not None:
3140       self.cluster.reserved_lvs = self.op.reserved_lvs
3141
3142     def helper_os(aname, mods, desc):
3143       desc += " OS list"
3144       lst = getattr(self.cluster, aname)
3145       for key, val in mods:
3146         if key == constants.DDM_ADD:
3147           if val in lst:
3148             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3149           else:
3150             lst.append(val)
3151         elif key == constants.DDM_REMOVE:
3152           if val in lst:
3153             lst.remove(val)
3154           else:
3155             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3156         else:
3157           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3158
3159     if self.op.hidden_os:
3160       helper_os("hidden_os", self.op.hidden_os, "hidden")
3161
3162     if self.op.blacklisted_os:
3163       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3164
3165     if self.op.master_netdev:
3166       master = self.cfg.GetMasterNode()
3167       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3168                   self.cluster.master_netdev)
3169       result = self.rpc.call_node_stop_master(master, False)
3170       result.Raise("Could not disable the master ip")
3171       feedback_fn("Changing master_netdev from %s to %s" %
3172                   (self.cluster.master_netdev, self.op.master_netdev))
3173       self.cluster.master_netdev = self.op.master_netdev
3174
3175     self.cfg.Update(self.cluster, feedback_fn)
3176
3177     if self.op.master_netdev:
3178       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3179                   self.op.master_netdev)
3180       result = self.rpc.call_node_start_master(master, False, False)
3181       if result.fail_msg:
3182         self.LogWarning("Could not re-enable the master ip on"
3183                         " the master, please restart manually: %s",
3184                         result.fail_msg)
3185
3186
3187 def _UploadHelper(lu, nodes, fname):
3188   """Helper for uploading a file and showing warnings.
3189
3190   """
3191   if os.path.exists(fname):
3192     result = lu.rpc.call_upload_file(nodes, fname)
3193     for to_node, to_result in result.items():
3194       msg = to_result.fail_msg
3195       if msg:
3196         msg = ("Copy of file %s to node %s failed: %s" %
3197                (fname, to_node, msg))
3198         lu.proc.LogWarning(msg)
3199
3200
3201 def _ComputeAncillaryFiles(cluster, redist):
3202   """Compute files external to Ganeti which need to be consistent.
3203
3204   @type redist: boolean
3205   @param redist: Whether to include files which need to be redistributed
3206
3207   """
3208   # Compute files for all nodes
3209   files_all = set([
3210     constants.SSH_KNOWN_HOSTS_FILE,
3211     constants.CONFD_HMAC_KEY,
3212     constants.CLUSTER_DOMAIN_SECRET_FILE,
3213     ])
3214
3215   if not redist:
3216     files_all.update(constants.ALL_CERT_FILES)
3217     files_all.update(ssconf.SimpleStore().GetFileList())
3218
3219   if cluster.modify_etc_hosts:
3220     files_all.add(constants.ETC_HOSTS)
3221
3222   # Files which must either exist on all nodes or on none
3223   files_all_opt = set([
3224     constants.RAPI_USERS_FILE,
3225     ])
3226
3227   # Files which should only be on master candidates
3228   files_mc = set()
3229   if not redist:
3230     files_mc.add(constants.CLUSTER_CONF_FILE)
3231
3232   # Files which should only be on VM-capable nodes
3233   files_vm = set(filename
3234     for hv_name in cluster.enabled_hypervisors
3235     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3236
3237   # Filenames must be unique
3238   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3239           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3240          "Found file listed in more than one file list"
3241
3242   return (files_all, files_all_opt, files_mc, files_vm)
3243
3244
3245 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3246   """Distribute additional files which are part of the cluster configuration.
3247
3248   ConfigWriter takes care of distributing the config and ssconf files, but
3249   there are more files which should be distributed to all nodes. This function
3250   makes sure those are copied.
3251
3252   @param lu: calling logical unit
3253   @param additional_nodes: list of nodes not in the config to distribute to
3254   @type additional_vm: boolean
3255   @param additional_vm: whether the additional nodes are vm-capable or not
3256
3257   """
3258   # Gather target nodes
3259   cluster = lu.cfg.GetClusterInfo()
3260   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3261
3262   online_nodes = lu.cfg.GetOnlineNodeList()
3263   vm_nodes = lu.cfg.GetVmCapableNodeList()
3264
3265   if additional_nodes is not None:
3266     online_nodes.extend(additional_nodes)
3267     if additional_vm:
3268       vm_nodes.extend(additional_nodes)
3269
3270   # Never distribute to master node
3271   for nodelist in [online_nodes, vm_nodes]:
3272     if master_info.name in nodelist:
3273       nodelist.remove(master_info.name)
3274
3275   # Gather file lists
3276   (files_all, files_all_opt, files_mc, files_vm) = \
3277     _ComputeAncillaryFiles(cluster, True)
3278
3279   # Never re-distribute configuration file from here
3280   assert not (constants.CLUSTER_CONF_FILE in files_all or
3281               constants.CLUSTER_CONF_FILE in files_vm)
3282   assert not files_mc, "Master candidates not handled in this function"
3283
3284   filemap = [
3285     (online_nodes, files_all),
3286     (online_nodes, files_all_opt),
3287     (vm_nodes, files_vm),
3288     ]
3289
3290   # Upload the files
3291   for (node_list, files) in filemap:
3292     for fname in files:
3293       _UploadHelper(lu, node_list, fname)
3294
3295
3296 class LUClusterRedistConf(NoHooksLU):
3297   """Force the redistribution of cluster configuration.
3298
3299   This is a very simple LU.
3300
3301   """
3302   REQ_BGL = False
3303
3304   def ExpandNames(self):
3305     self.needed_locks = {
3306       locking.LEVEL_NODE: locking.ALL_SET,
3307     }
3308     self.share_locks[locking.LEVEL_NODE] = 1
3309
3310   def Exec(self, feedback_fn):
3311     """Redistribute the configuration.
3312
3313     """
3314     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3315     _RedistributeAncillaryFiles(self)
3316
3317
3318 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3319   """Sleep and poll for an instance's disk to sync.
3320
3321   """
3322   if not instance.disks or disks is not None and not disks:
3323     return True
3324
3325   disks = _ExpandCheckDisks(instance, disks)
3326
3327   if not oneshot:
3328     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3329
3330   node = instance.primary_node
3331
3332   for dev in disks:
3333     lu.cfg.SetDiskID(dev, node)
3334
3335   # TODO: Convert to utils.Retry
3336
3337   retries = 0
3338   degr_retries = 10 # in seconds, as we sleep 1 second each time
3339   while True:
3340     max_time = 0
3341     done = True
3342     cumul_degraded = False
3343     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3344     msg = rstats.fail_msg
3345     if msg:
3346       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3347       retries += 1
3348       if retries >= 10:
3349         raise errors.RemoteError("Can't contact node %s for mirror data,"
3350                                  " aborting." % node)
3351       time.sleep(6)
3352       continue
3353     rstats = rstats.payload
3354     retries = 0
3355     for i, mstat in enumerate(rstats):
3356       if mstat is None:
3357         lu.LogWarning("Can't compute data for node %s/%s",
3358                            node, disks[i].iv_name)
3359         continue
3360
3361       cumul_degraded = (cumul_degraded or
3362                         (mstat.is_degraded and mstat.sync_percent is None))
3363       if mstat.sync_percent is not None:
3364         done = False
3365         if mstat.estimated_time is not None:
3366           rem_time = ("%s remaining (estimated)" %
3367                       utils.FormatSeconds(mstat.estimated_time))
3368           max_time = mstat.estimated_time
3369         else:
3370           rem_time = "no time estimate"
3371         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3372                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3373
3374     # if we're done but degraded, let's do a few small retries, to
3375     # make sure we see a stable and not transient situation; therefore
3376     # we force restart of the loop
3377     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3378       logging.info("Degraded disks found, %d retries left", degr_retries)
3379       degr_retries -= 1
3380       time.sleep(1)
3381       continue
3382
3383     if done or oneshot:
3384       break
3385
3386     time.sleep(min(60, max_time))
3387
3388   if done:
3389     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3390   return not cumul_degraded
3391
3392
3393 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3394   """Check that mirrors are not degraded.
3395
3396   The ldisk parameter, if True, will change the test from the
3397   is_degraded attribute (which represents overall non-ok status for
3398   the device(s)) to the ldisk (representing the local storage status).
3399
3400   """
3401   lu.cfg.SetDiskID(dev, node)
3402
3403   result = True
3404
3405   if on_primary or dev.AssembleOnSecondary():
3406     rstats = lu.rpc.call_blockdev_find(node, dev)
3407     msg = rstats.fail_msg
3408     if msg:
3409       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3410       result = False
3411     elif not rstats.payload:
3412       lu.LogWarning("Can't find disk on node %s", node)
3413       result = False
3414     else:
3415       if ldisk:
3416         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3417       else:
3418         result = result and not rstats.payload.is_degraded
3419
3420   if dev.children:
3421     for child in dev.children:
3422       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3423
3424   return result
3425
3426
3427 class LUOobCommand(NoHooksLU):
3428   """Logical unit for OOB handling.
3429
3430   """
3431   REG_BGL = False
3432   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3433
3434   def CheckPrereq(self):
3435     """Check prerequisites.
3436
3437     This checks:
3438      - the node exists in the configuration
3439      - OOB is supported
3440
3441     Any errors are signaled by raising errors.OpPrereqError.
3442
3443     """
3444     self.nodes = []
3445     self.master_node = self.cfg.GetMasterNode()
3446
3447     assert self.op.power_delay >= 0.0
3448
3449     if self.op.node_names:
3450       if (self.op.command in self._SKIP_MASTER and
3451           self.master_node in self.op.node_names):
3452         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3453         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3454
3455         if master_oob_handler:
3456           additional_text = ("run '%s %s %s' if you want to operate on the"
3457                              " master regardless") % (master_oob_handler,
3458                                                       self.op.command,
3459                                                       self.master_node)
3460         else:
3461           additional_text = "it does not support out-of-band operations"
3462
3463         raise errors.OpPrereqError(("Operating on the master node %s is not"
3464                                     " allowed for %s; %s") %
3465                                    (self.master_node, self.op.command,
3466                                     additional_text), errors.ECODE_INVAL)
3467     else:
3468       self.op.node_names = self.cfg.GetNodeList()
3469       if self.op.command in self._SKIP_MASTER:
3470         self.op.node_names.remove(self.master_node)
3471
3472     if self.op.command in self._SKIP_MASTER:
3473       assert self.master_node not in self.op.node_names
3474
3475     for node_name in self.op.node_names:
3476       node = self.cfg.GetNodeInfo(node_name)
3477
3478       if node is None:
3479         raise errors.OpPrereqError("Node %s not found" % node_name,
3480                                    errors.ECODE_NOENT)
3481       else:
3482         self.nodes.append(node)
3483
3484       if (not self.op.ignore_status and
3485           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3486         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3487                                     " not marked offline") % node_name,
3488                                    errors.ECODE_STATE)
3489
3490   def ExpandNames(self):
3491     """Gather locks we need.
3492
3493     """
3494     if self.op.node_names:
3495       self.op.node_names = [_ExpandNodeName(self.cfg, name)
3496                             for name in self.op.node_names]
3497       lock_names = self.op.node_names
3498     else:
3499       lock_names = locking.ALL_SET
3500
3501     self.needed_locks = {
3502       locking.LEVEL_NODE: lock_names,
3503       }
3504
3505   def Exec(self, feedback_fn):
3506     """Execute OOB and return result if we expect any.
3507
3508     """
3509     master_node = self.master_node
3510     ret = []
3511
3512     for idx, node in enumerate(self.nodes):
3513       node_entry = [(constants.RS_NORMAL, node.name)]
3514       ret.append(node_entry)
3515
3516       oob_program = _SupportsOob(self.cfg, node)
3517
3518       if not oob_program:
3519         node_entry.append((constants.RS_UNAVAIL, None))
3520         continue
3521
3522       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3523                    self.op.command, oob_program, node.name)
3524       result = self.rpc.call_run_oob(master_node, oob_program,
3525                                      self.op.command, node.name,
3526                                      self.op.timeout)
3527
3528       if result.fail_msg:
3529         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3530                         node.name, result.fail_msg)
3531         node_entry.append((constants.RS_NODATA, None))
3532       else:
3533         try:
3534           self._CheckPayload(result)
3535         except errors.OpExecError, err:
3536           self.LogWarning("Payload returned by node '%s' is not valid: %s",
3537                           node.name, err)
3538           node_entry.append((constants.RS_NODATA, None))
3539         else:
3540           if self.op.command == constants.OOB_HEALTH:
3541             # For health we should log important events
3542             for item, status in result.payload:
3543               if status in [constants.OOB_STATUS_WARNING,
3544                             constants.OOB_STATUS_CRITICAL]:
3545                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
3546                                 item, node.name, status)
3547
3548           if self.op.command == constants.OOB_POWER_ON:
3549             node.powered = True
3550           elif self.op.command == constants.OOB_POWER_OFF:
3551             node.powered = False
3552           elif self.op.command == constants.OOB_POWER_STATUS:
3553             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3554             if powered != node.powered:
3555               logging.warning(("Recorded power state (%s) of node '%s' does not"
3556                                " match actual power state (%s)"), node.powered,
3557                               node.name, powered)
3558
3559           # For configuration changing commands we should update the node
3560           if self.op.command in (constants.OOB_POWER_ON,
3561                                  constants.OOB_POWER_OFF):
3562             self.cfg.Update(node, feedback_fn)
3563
3564           node_entry.append((constants.RS_NORMAL, result.payload))
3565
3566           if (self.op.command == constants.OOB_POWER_ON and
3567               idx < len(self.nodes) - 1):
3568             time.sleep(self.op.power_delay)
3569
3570     return ret
3571
3572   def _CheckPayload(self, result):
3573     """Checks if the payload is valid.
3574
3575     @param result: RPC result
3576     @raises errors.OpExecError: If payload is not valid
3577
3578     """
3579     errs = []
3580     if self.op.command == constants.OOB_HEALTH:
3581       if not isinstance(result.payload, list):
3582         errs.append("command 'health' is expected to return a list but got %s" %
3583                     type(result.payload))
3584       else:
3585         for item, status in result.payload:
3586           if status not in constants.OOB_STATUSES:
3587             errs.append("health item '%s' has invalid status '%s'" %
3588                         (item, status))
3589
3590     if self.op.command == constants.OOB_POWER_STATUS:
3591       if not isinstance(result.payload, dict):
3592         errs.append("power-status is expected to return a dict but got %s" %
3593                     type(result.payload))
3594
3595     if self.op.command in [
3596         constants.OOB_POWER_ON,
3597         constants.OOB_POWER_OFF,
3598         constants.OOB_POWER_CYCLE,
3599         ]:
3600       if result.payload is not None:
3601         errs.append("%s is expected to not return payload but got '%s'" %
3602                     (self.op.command, result.payload))
3603
3604     if errs:
3605       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
3606                                utils.CommaJoin(errs))
3607
3608 class _OsQuery(_QueryBase):
3609   FIELDS = query.OS_FIELDS
3610
3611   def ExpandNames(self, lu):
3612     # Lock all nodes in shared mode
3613     # Temporary removal of locks, should be reverted later
3614     # TODO: reintroduce locks when they are lighter-weight
3615     lu.needed_locks = {}
3616     #self.share_locks[locking.LEVEL_NODE] = 1
3617     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3618
3619     # The following variables interact with _QueryBase._GetNames
3620     if self.names:
3621       self.wanted = self.names
3622     else:
3623       self.wanted = locking.ALL_SET
3624
3625     self.do_locking = self.use_locking
3626
3627   def DeclareLocks(self, lu, level):
3628     pass
3629
3630   @staticmethod
3631   def _DiagnoseByOS(rlist):
3632     """Remaps a per-node return list into an a per-os per-node dictionary
3633
3634     @param rlist: a map with node names as keys and OS objects as values
3635
3636     @rtype: dict
3637     @return: a dictionary with osnames as keys and as value another
3638         map, with nodes as keys and tuples of (path, status, diagnose,
3639         variants, parameters, api_versions) as values, eg::
3640
3641           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
3642                                      (/srv/..., False, "invalid api")],
3643                            "node2": [(/srv/..., True, "", [], [])]}
3644           }
3645
3646     """
3647     all_os = {}
3648     # we build here the list of nodes that didn't fail the RPC (at RPC
3649     # level), so that nodes with a non-responding node daemon don't
3650     # make all OSes invalid
3651     good_nodes = [node_name for node_name in rlist
3652                   if not rlist[node_name].fail_msg]
3653     for node_name, nr in rlist.items():
3654       if nr.fail_msg or not nr.payload:
3655         continue
3656       for (name, path, status, diagnose, variants,
3657            params, api_versions) in nr.payload:
3658         if name not in all_os:
3659           # build a list of nodes for this os containing empty lists
3660           # for each node in node_list
3661           all_os[name] = {}
3662           for nname in good_nodes:
3663             all_os[name][nname] = []
3664         # convert params from [name, help] to (name, help)
3665         params = [tuple(v) for v in params]
3666         all_os[name][node_name].append((path, status, diagnose,
3667                                         variants, params, api_versions))
3668     return all_os
3669
3670   def _GetQueryData(self, lu):
3671     """Computes the list of nodes and their attributes.
3672
3673     """
3674     # Locking is not used
3675     assert not (lu.acquired_locks or self.do_locking or self.use_locking)
3676
3677     valid_nodes = [node.name
3678                    for node in lu.cfg.GetAllNodesInfo().values()
3679                    if not node.offline and node.vm_capable]
3680     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
3681     cluster = lu.cfg.GetClusterInfo()
3682
3683     data = {}
3684
3685     for (os_name, os_data) in pol.items():
3686       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
3687                           hidden=(os_name in cluster.hidden_os),
3688                           blacklisted=(os_name in cluster.blacklisted_os))
3689
3690       variants = set()
3691       parameters = set()
3692       api_versions = set()
3693
3694       for idx, osl in enumerate(os_data.values()):
3695         info.valid = bool(info.valid and osl and osl[0][1])
3696         if not info.valid:
3697           break
3698
3699         (node_variants, node_params, node_api) = osl[0][3:6]
3700         if idx == 0:
3701           # First entry
3702           variants.update(node_variants)
3703           parameters.update(node_params)
3704           api_versions.update(node_api)
3705         else:
3706           # Filter out inconsistent values
3707           variants.intersection_update(node_variants)
3708           parameters.intersection_update(node_params)
3709           api_versions.intersection_update(node_api)
3710
3711       info.variants = list(variants)
3712       info.parameters = list(parameters)
3713       info.api_versions = list(api_versions)
3714
3715       data[os_name] = info
3716
3717     # Prepare data in requested order
3718     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
3719             if name in data]
3720
3721
3722 class LUOsDiagnose(NoHooksLU):
3723   """Logical unit for OS diagnose/query.
3724
3725   """
3726   REQ_BGL = False
3727
3728   @staticmethod
3729   def _BuildFilter(fields, names):
3730     """Builds a filter for querying OSes.
3731
3732     """
3733     name_filter = qlang.MakeSimpleFilter("name", names)
3734
3735     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
3736     # respective field is not requested
3737     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
3738                      for fname in ["hidden", "blacklisted"]
3739                      if fname not in fields]
3740     if "valid" not in fields:
3741       status_filter.append([qlang.OP_TRUE, "valid"])
3742
3743     if status_filter:
3744       status_filter.insert(0, qlang.OP_AND)
3745     else:
3746       status_filter = None
3747
3748     if name_filter and status_filter:
3749       return [qlang.OP_AND, name_filter, status_filter]
3750     elif name_filter:
3751       return name_filter
3752     else:
3753       return status_filter
3754
3755   def CheckArguments(self):
3756     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
3757                        self.op.output_fields, False)
3758
3759   def ExpandNames(self):
3760     self.oq.ExpandNames(self)
3761
3762   def Exec(self, feedback_fn):
3763     return self.oq.OldStyleQuery(self)
3764
3765
3766 class LUNodeRemove(LogicalUnit):
3767   """Logical unit for removing a node.
3768
3769   """
3770   HPATH = "node-remove"
3771   HTYPE = constants.HTYPE_NODE
3772
3773   def BuildHooksEnv(self):
3774     """Build hooks env.
3775
3776     This doesn't run on the target node in the pre phase as a failed
3777     node would then be impossible to remove.
3778
3779     """
3780     return {
3781       "OP_TARGET": self.op.node_name,
3782       "NODE_NAME": self.op.node_name,
3783       }
3784
3785   def BuildHooksNodes(self):
3786     """Build hooks nodes.
3787
3788     """
3789     all_nodes = self.cfg.GetNodeList()
3790     try:
3791       all_nodes.remove(self.op.node_name)
3792     except ValueError:
3793       logging.warning("Node '%s', which is about to be removed, was not found"
3794                       " in the list of all nodes", self.op.node_name)
3795     return (all_nodes, all_nodes)
3796
3797   def CheckPrereq(self):
3798     """Check prerequisites.
3799
3800     This checks:
3801      - the node exists in the configuration
3802      - it does not have primary or secondary instances
3803      - it's not the master
3804
3805     Any errors are signaled by raising errors.OpPrereqError.
3806
3807     """
3808     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
3809     node = self.cfg.GetNodeInfo(self.op.node_name)
3810     assert node is not None
3811
3812     instance_list = self.cfg.GetInstanceList()
3813
3814     masternode = self.cfg.GetMasterNode()
3815     if node.name == masternode:
3816       raise errors.OpPrereqError("Node is the master node, failover to another"
3817                                  " node is required", errors.ECODE_INVAL)
3818
3819     for instance_name in instance_list:
3820       instance = self.cfg.GetInstanceInfo(instance_name)
3821       if node.name in instance.all_nodes:
3822         raise errors.OpPrereqError("Instance %s is still running on the node,"
3823                                    " please remove first" % instance_name,
3824                                    errors.ECODE_INVAL)
3825     self.op.node_name = node.name
3826     self.node = node
3827
3828   def Exec(self, feedback_fn):
3829     """Removes the node from the cluster.
3830
3831     """
3832     node = self.node
3833     logging.info("Stopping the node daemon and removing configs from node %s",
3834                  node.name)
3835
3836     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
3837
3838     # Promote nodes to master candidate as needed
3839     _AdjustCandidatePool(self, exceptions=[node.name])
3840     self.context.RemoveNode(node.name)
3841
3842     # Run post hooks on the node before it's removed
3843     _RunPostHook(self, node.name)
3844
3845     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
3846     msg = result.fail_msg
3847     if msg:
3848       self.LogWarning("Errors encountered on the remote node while leaving"
3849                       " the cluster: %s", msg)
3850
3851     # Remove node from our /etc/hosts
3852     if self.cfg.GetClusterInfo().modify_etc_hosts:
3853       master_node = self.cfg.GetMasterNode()
3854       result = self.rpc.call_etc_hosts_modify(master_node,
3855                                               constants.ETC_HOSTS_REMOVE,
3856                                               node.name, None)
3857       result.Raise("Can't update hosts file with new host data")
3858       _RedistributeAncillaryFiles(self)
3859
3860
3861 class _NodeQuery(_QueryBase):
3862   FIELDS = query.NODE_FIELDS
3863
3864   def ExpandNames(self, lu):
3865     lu.needed_locks = {}
3866     lu.share_locks[locking.LEVEL_NODE] = 1
3867
3868     if self.names:
3869       self.wanted = _GetWantedNodes(lu, self.names)
3870     else:
3871       self.wanted = locking.ALL_SET
3872
3873     self.do_locking = (self.use_locking and
3874                        query.NQ_LIVE in self.requested_data)
3875
3876     if self.do_locking:
3877       # if we don't request only static fields, we need to lock the nodes
3878       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
3879
3880   def DeclareLocks(self, lu, level):
3881     pass
3882
3883   def _GetQueryData(self, lu):
3884     """Computes the list of nodes and their attributes.
3885
3886     """
3887     all_info = lu.cfg.GetAllNodesInfo()
3888
3889     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
3890
3891     # Gather data as requested
3892     if query.NQ_LIVE in self.requested_data:
3893       # filter out non-vm_capable nodes
3894       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
3895
3896       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
3897                                         lu.cfg.GetHypervisorType())
3898       live_data = dict((name, nresult.payload)
3899                        for (name, nresult) in node_data.items()
3900                        if not nresult.fail_msg and nresult.payload)
3901     else:
3902       live_data = None
3903
3904     if query.NQ_INST in self.requested_data:
3905       node_to_primary = dict([(name, set()) for name in nodenames])
3906       node_to_secondary = dict([(name, set()) for name in nodenames])
3907
3908       inst_data = lu.cfg.GetAllInstancesInfo()
3909
3910       for inst in inst_data.values():
3911         if inst.primary_node in node_to_primary:
3912           node_to_primary[inst.primary_node].add(inst.name)
3913         for secnode in inst.secondary_nodes:
3914           if secnode in node_to_secondary:
3915             node_to_secondary[secnode].add(inst.name)
3916     else:
3917       node_to_primary = None
3918       node_to_secondary = None
3919
3920     if query.NQ_OOB in self.requested_data:
3921       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
3922                          for name, node in all_info.iteritems())
3923     else:
3924       oob_support = None
3925
3926     if query.NQ_GROUP in self.requested_data:
3927       groups = lu.cfg.GetAllNodeGroupsInfo()
3928     else:
3929       groups = {}
3930
3931     return query.NodeQueryData([all_info[name] for name in nodenames],
3932                                live_data, lu.cfg.GetMasterNode(),
3933                                node_to_primary, node_to_secondary, groups,
3934                                oob_support, lu.cfg.GetClusterInfo())
3935
3936
3937 class LUNodeQuery(NoHooksLU):
3938   """Logical unit for querying nodes.
3939
3940   """
3941   # pylint: disable-msg=W0142
3942   REQ_BGL = False
3943
3944   def CheckArguments(self):
3945     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
3946                          self.op.output_fields, self.op.use_locking)
3947
3948   def ExpandNames(self):
3949     self.nq.ExpandNames(self)
3950
3951   def Exec(self, feedback_fn):
3952     return self.nq.OldStyleQuery(self)
3953
3954
3955 class LUNodeQueryvols(NoHooksLU):
3956   """Logical unit for getting volumes on node(s).
3957
3958   """
3959   REQ_BGL = False
3960   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
3961   _FIELDS_STATIC = utils.FieldSet("node")
3962
3963   def CheckArguments(self):
3964     _CheckOutputFields(static=self._FIELDS_STATIC,
3965                        dynamic=self._FIELDS_DYNAMIC,
3966                        selected=self.op.output_fields)
3967
3968   def ExpandNames(self):
3969     self.needed_locks = {}
3970     self.share_locks[locking.LEVEL_NODE] = 1
3971     if not self.op.nodes:
3972       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
3973     else:
3974       self.needed_locks[locking.LEVEL_NODE] = \
3975         _GetWantedNodes(self, self.op.nodes)
3976
3977   def Exec(self, feedback_fn):
3978     """Computes the list of nodes and their attributes.
3979
3980     """
3981     nodenames = self.acquired_locks[locking.LEVEL_NODE]
3982     volumes = self.rpc.call_node_volumes(nodenames)
3983
3984     ilist = [self.cfg.GetInstanceInfo(iname) for iname
3985              in self.cfg.GetInstanceList()]
3986
3987     lv_by_node = dict([(inst, inst.MapLVsByNode()) for inst in ilist])
3988
3989     output = []
3990     for node in nodenames:
3991       nresult = volumes[node]
3992       if nresult.offline:
3993         continue
3994       msg = nresult.fail_msg
3995       if msg:
3996         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
3997         continue
3998
3999       node_vols = nresult.payload[:]
4000       node_vols.sort(key=lambda vol: vol['dev'])
4001
4002       for vol in node_vols:
4003         node_output = []
4004         for field in self.op.output_fields:
4005           if field == "node":
4006             val = node
4007           elif field == "phys":
4008             val = vol['dev']
4009           elif field == "vg":
4010             val = vol['vg']
4011           elif field == "name":
4012             val = vol['name']
4013           elif field == "size":
4014             val = int(float(vol['size']))
4015           elif field == "instance":
4016             for inst in ilist:
4017               if node not in lv_by_node[inst]:
4018                 continue
4019               if vol['name'] in lv_by_node[inst][node]:
4020                 val = inst.name
4021                 break
4022             else:
4023               val = '-'
4024           else:
4025             raise errors.ParameterError(field)
4026           node_output.append(str(val))
4027
4028         output.append(node_output)
4029
4030     return output
4031
4032
4033 class LUNodeQueryStorage(NoHooksLU):
4034   """Logical unit for getting information on storage units on node(s).
4035
4036   """
4037   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4038   REQ_BGL = False
4039
4040   def CheckArguments(self):
4041     _CheckOutputFields(static=self._FIELDS_STATIC,
4042                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4043                        selected=self.op.output_fields)
4044
4045   def ExpandNames(self):
4046     self.needed_locks = {}
4047     self.share_locks[locking.LEVEL_NODE] = 1
4048
4049     if self.op.nodes:
4050       self.needed_locks[locking.LEVEL_NODE] = \
4051         _GetWantedNodes(self, self.op.nodes)
4052     else:
4053       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4054
4055   def Exec(self, feedback_fn):
4056     """Computes the list of nodes and their attributes.
4057
4058     """
4059     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
4060
4061     # Always get name to sort by
4062     if constants.SF_NAME in self.op.output_fields:
4063       fields = self.op.output_fields[:]
4064     else:
4065       fields = [constants.SF_NAME] + self.op.output_fields
4066
4067     # Never ask for node or type as it's only known to the LU
4068     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4069       while extra in fields:
4070         fields.remove(extra)
4071
4072     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4073     name_idx = field_idx[constants.SF_NAME]
4074
4075     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4076     data = self.rpc.call_storage_list(self.nodes,
4077                                       self.op.storage_type, st_args,
4078                                       self.op.name, fields)
4079
4080     result = []
4081
4082     for node in utils.NiceSort(self.nodes):
4083       nresult = data[node]
4084       if nresult.offline:
4085         continue
4086
4087       msg = nresult.fail_msg
4088       if msg:
4089         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4090         continue
4091
4092       rows = dict([(row[name_idx], row) for row in nresult.payload])
4093
4094       for name in utils.NiceSort(rows.keys()):
4095         row = rows[name]
4096
4097         out = []
4098
4099         for field in self.op.output_fields:
4100           if field == constants.SF_NODE:
4101             val = node
4102           elif field == constants.SF_TYPE:
4103             val = self.op.storage_type
4104           elif field in field_idx:
4105             val = row[field_idx[field]]
4106           else:
4107             raise errors.ParameterError(field)
4108
4109           out.append(val)
4110
4111         result.append(out)
4112
4113     return result
4114
4115
4116 class _InstanceQuery(_QueryBase):
4117   FIELDS = query.INSTANCE_FIELDS
4118
4119   def ExpandNames(self, lu):
4120     lu.needed_locks = {}
4121     lu.share_locks[locking.LEVEL_INSTANCE] = 1
4122     lu.share_locks[locking.LEVEL_NODE] = 1
4123
4124     if self.names:
4125       self.wanted = _GetWantedInstances(lu, self.names)
4126     else:
4127       self.wanted = locking.ALL_SET
4128
4129     self.do_locking = (self.use_locking and
4130                        query.IQ_LIVE in self.requested_data)
4131     if self.do_locking:
4132       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4133       lu.needed_locks[locking.LEVEL_NODE] = []
4134       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4135
4136   def DeclareLocks(self, lu, level):
4137     if level == locking.LEVEL_NODE and self.do_locking:
4138       lu._LockInstancesNodes() # pylint: disable-msg=W0212
4139
4140   def _GetQueryData(self, lu):
4141     """Computes the list of instances and their attributes.
4142
4143     """
4144     cluster = lu.cfg.GetClusterInfo()
4145     all_info = lu.cfg.GetAllInstancesInfo()
4146
4147     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4148
4149     instance_list = [all_info[name] for name in instance_names]
4150     nodes = frozenset(itertools.chain(*(inst.all_nodes
4151                                         for inst in instance_list)))
4152     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4153     bad_nodes = []
4154     offline_nodes = []
4155     wrongnode_inst = set()
4156
4157     # Gather data as requested
4158     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4159       live_data = {}
4160       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4161       for name in nodes:
4162         result = node_data[name]
4163         if result.offline:
4164           # offline nodes will be in both lists
4165           assert result.fail_msg
4166           offline_nodes.append(name)
4167         if result.fail_msg:
4168           bad_nodes.append(name)
4169         elif result.payload:
4170           for inst in result.payload:
4171             if inst in all_info:
4172               if all_info[inst].primary_node == name:
4173                 live_data.update(result.payload)
4174               else:
4175                 wrongnode_inst.add(inst)
4176             else:
4177               # orphan instance; we don't list it here as we don't
4178               # handle this case yet in the output of instance listing
4179               logging.warning("Orphan instance '%s' found on node %s",
4180                               inst, name)
4181         # else no instance is alive
4182     else:
4183       live_data = {}
4184
4185     if query.IQ_DISKUSAGE in self.requested_data:
4186       disk_usage = dict((inst.name,
4187                          _ComputeDiskSize(inst.disk_template,
4188                                           [{constants.IDISK_SIZE: disk.size}
4189                                            for disk in inst.disks]))
4190                         for inst in instance_list)
4191     else:
4192       disk_usage = None
4193
4194     if query.IQ_CONSOLE in self.requested_data:
4195       consinfo = {}
4196       for inst in instance_list:
4197         if inst.name in live_data:
4198           # Instance is running
4199           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4200         else:
4201           consinfo[inst.name] = None
4202       assert set(consinfo.keys()) == set(instance_names)
4203     else:
4204       consinfo = None
4205
4206     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4207                                    disk_usage, offline_nodes, bad_nodes,
4208                                    live_data, wrongnode_inst, consinfo)
4209
4210
4211 class LUQuery(NoHooksLU):
4212   """Query for resources/items of a certain kind.
4213
4214   """
4215   # pylint: disable-msg=W0142
4216   REQ_BGL = False
4217
4218   def CheckArguments(self):
4219     qcls = _GetQueryImplementation(self.op.what)
4220
4221     self.impl = qcls(self.op.filter, self.op.fields, False)
4222
4223   def ExpandNames(self):
4224     self.impl.ExpandNames(self)
4225
4226   def DeclareLocks(self, level):
4227     self.impl.DeclareLocks(self, level)
4228
4229   def Exec(self, feedback_fn):
4230     return self.impl.NewStyleQuery(self)
4231
4232
4233 class LUQueryFields(NoHooksLU):
4234   """Query for resources/items of a certain kind.
4235
4236   """
4237   # pylint: disable-msg=W0142
4238   REQ_BGL = False
4239
4240   def CheckArguments(self):
4241     self.qcls = _GetQueryImplementation(self.op.what)
4242
4243   def ExpandNames(self):
4244     self.needed_locks = {}
4245
4246   def Exec(self, feedback_fn):
4247     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4248
4249
4250 class LUNodeModifyStorage(NoHooksLU):
4251   """Logical unit for modifying a storage volume on a node.
4252
4253   """
4254   REQ_BGL = False
4255
4256   def CheckArguments(self):
4257     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4258
4259     storage_type = self.op.storage_type
4260
4261     try:
4262       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4263     except KeyError:
4264       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4265                                  " modified" % storage_type,
4266                                  errors.ECODE_INVAL)
4267
4268     diff = set(self.op.changes.keys()) - modifiable
4269     if diff:
4270       raise errors.OpPrereqError("The following fields can not be modified for"
4271                                  " storage units of type '%s': %r" %
4272                                  (storage_type, list(diff)),
4273                                  errors.ECODE_INVAL)
4274
4275   def ExpandNames(self):
4276     self.needed_locks = {
4277       locking.LEVEL_NODE: self.op.node_name,
4278       }
4279
4280   def Exec(self, feedback_fn):
4281     """Computes the list of nodes and their attributes.
4282
4283     """
4284     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4285     result = self.rpc.call_storage_modify(self.op.node_name,
4286                                           self.op.storage_type, st_args,
4287                                           self.op.name, self.op.changes)
4288     result.Raise("Failed to modify storage unit '%s' on %s" %
4289                  (self.op.name, self.op.node_name))
4290
4291
4292 class LUNodeAdd(LogicalUnit):
4293   """Logical unit for adding node to the cluster.
4294
4295   """
4296   HPATH = "node-add"
4297   HTYPE = constants.HTYPE_NODE
4298   _NFLAGS = ["master_capable", "vm_capable"]
4299
4300   def CheckArguments(self):
4301     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4302     # validate/normalize the node name
4303     self.hostname = netutils.GetHostname(name=self.op.node_name,
4304                                          family=self.primary_ip_family)
4305     self.op.node_name = self.hostname.name
4306
4307     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4308       raise errors.OpPrereqError("Cannot readd the master node",
4309                                  errors.ECODE_STATE)
4310
4311     if self.op.readd and self.op.group:
4312       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4313                                  " being readded", errors.ECODE_INVAL)
4314
4315   def BuildHooksEnv(self):
4316     """Build hooks env.
4317
4318     This will run on all nodes before, and on all nodes + the new node after.
4319
4320     """
4321     return {
4322       "OP_TARGET": self.op.node_name,
4323       "NODE_NAME": self.op.node_name,
4324       "NODE_PIP": self.op.primary_ip,
4325       "NODE_SIP": self.op.secondary_ip,
4326       "MASTER_CAPABLE": str(self.op.master_capable),
4327       "VM_CAPABLE": str(self.op.vm_capable),
4328       }
4329
4330   def BuildHooksNodes(self):
4331     """Build hooks nodes.
4332
4333     """
4334     # Exclude added node
4335     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4336     post_nodes = pre_nodes + [self.op.node_name, ]
4337
4338     return (pre_nodes, post_nodes)
4339
4340   def CheckPrereq(self):
4341     """Check prerequisites.
4342
4343     This checks:
4344      - the new node is not already in the config
4345      - it is resolvable
4346      - its parameters (single/dual homed) matches the cluster
4347
4348     Any errors are signaled by raising errors.OpPrereqError.
4349
4350     """
4351     cfg = self.cfg
4352     hostname = self.hostname
4353     node = hostname.name
4354     primary_ip = self.op.primary_ip = hostname.ip
4355     if self.op.secondary_ip is None:
4356       if self.primary_ip_family == netutils.IP6Address.family:
4357         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4358                                    " IPv4 address must be given as secondary",
4359                                    errors.ECODE_INVAL)
4360       self.op.secondary_ip = primary_ip
4361
4362     secondary_ip = self.op.secondary_ip
4363     if not netutils.IP4Address.IsValid(secondary_ip):
4364       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4365                                  " address" % secondary_ip, errors.ECODE_INVAL)
4366
4367     node_list = cfg.GetNodeList()
4368     if not self.op.readd and node in node_list:
4369       raise errors.OpPrereqError("Node %s is already in the configuration" %
4370                                  node, errors.ECODE_EXISTS)
4371     elif self.op.readd and node not in node_list:
4372       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4373                                  errors.ECODE_NOENT)
4374
4375     self.changed_primary_ip = False
4376
4377     for existing_node_name in node_list:
4378       existing_node = cfg.GetNodeInfo(existing_node_name)
4379
4380       if self.op.readd and node == existing_node_name:
4381         if existing_node.secondary_ip != secondary_ip:
4382           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4383                                      " address configuration as before",
4384                                      errors.ECODE_INVAL)
4385         if existing_node.primary_ip != primary_ip:
4386           self.changed_primary_ip = True
4387
4388         continue
4389
4390       if (existing_node.primary_ip == primary_ip or
4391           existing_node.secondary_ip == primary_ip or
4392           existing_node.primary_ip == secondary_ip or
4393           existing_node.secondary_ip == secondary_ip):
4394         raise errors.OpPrereqError("New node ip address(es) conflict with"
4395                                    " existing node %s" % existing_node.name,
4396                                    errors.ECODE_NOTUNIQUE)
4397
4398     # After this 'if' block, None is no longer a valid value for the
4399     # _capable op attributes
4400     if self.op.readd:
4401       old_node = self.cfg.GetNodeInfo(node)
4402       assert old_node is not None, "Can't retrieve locked node %s" % node
4403       for attr in self._NFLAGS:
4404         if getattr(self.op, attr) is None:
4405           setattr(self.op, attr, getattr(old_node, attr))
4406     else:
4407       for attr in self._NFLAGS:
4408         if getattr(self.op, attr) is None:
4409           setattr(self.op, attr, True)
4410
4411     if self.op.readd and not self.op.vm_capable:
4412       pri, sec = cfg.GetNodeInstances(node)
4413       if pri or sec:
4414         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4415                                    " flag set to false, but it already holds"
4416                                    " instances" % node,
4417                                    errors.ECODE_STATE)
4418
4419     # check that the type of the node (single versus dual homed) is the
4420     # same as for the master
4421     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4422     master_singlehomed = myself.secondary_ip == myself.primary_ip
4423     newbie_singlehomed = secondary_ip == primary_ip
4424     if master_singlehomed != newbie_singlehomed:
4425       if master_singlehomed:
4426         raise errors.OpPrereqError("The master has no secondary ip but the"
4427                                    " new node has one",
4428                                    errors.ECODE_INVAL)
4429       else:
4430         raise errors.OpPrereqError("The master has a secondary ip but the"
4431                                    " new node doesn't have one",
4432                                    errors.ECODE_INVAL)
4433
4434     # checks reachability
4435     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4436       raise errors.OpPrereqError("Node not reachable by ping",
4437                                  errors.ECODE_ENVIRON)
4438
4439     if not newbie_singlehomed:
4440       # check reachability from my secondary ip to newbie's secondary ip
4441       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4442                            source=myself.secondary_ip):
4443         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4444                                    " based ping to node daemon port",
4445                                    errors.ECODE_ENVIRON)
4446
4447     if self.op.readd:
4448       exceptions = [node]
4449     else:
4450       exceptions = []
4451
4452     if self.op.master_capable:
4453       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4454     else:
4455       self.master_candidate = False
4456
4457     if self.op.readd:
4458       self.new_node = old_node
4459     else:
4460       node_group = cfg.LookupNodeGroup(self.op.group)
4461       self.new_node = objects.Node(name=node,
4462                                    primary_ip=primary_ip,
4463                                    secondary_ip=secondary_ip,
4464                                    master_candidate=self.master_candidate,
4465                                    offline=False, drained=False,
4466                                    group=node_group)
4467
4468     if self.op.ndparams:
4469       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4470
4471   def Exec(self, feedback_fn):
4472     """Adds the new node to the cluster.
4473
4474     """
4475     new_node = self.new_node
4476     node = new_node.name
4477
4478     # We adding a new node so we assume it's powered
4479     new_node.powered = True
4480
4481     # for re-adds, reset the offline/drained/master-candidate flags;
4482     # we need to reset here, otherwise offline would prevent RPC calls
4483     # later in the procedure; this also means that if the re-add
4484     # fails, we are left with a non-offlined, broken node
4485     if self.op.readd:
4486       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4487       self.LogInfo("Readding a node, the offline/drained flags were reset")
4488       # if we demote the node, we do cleanup later in the procedure
4489       new_node.master_candidate = self.master_candidate
4490       if self.changed_primary_ip:
4491         new_node.primary_ip = self.op.primary_ip
4492
4493     # copy the master/vm_capable flags
4494     for attr in self._NFLAGS:
4495       setattr(new_node, attr, getattr(self.op, attr))
4496
4497     # notify the user about any possible mc promotion
4498     if new_node.master_candidate:
4499       self.LogInfo("Node will be a master candidate")
4500
4501     if self.op.ndparams:
4502       new_node.ndparams = self.op.ndparams
4503     else:
4504       new_node.ndparams = {}
4505
4506     # check connectivity
4507     result = self.rpc.call_version([node])[node]
4508     result.Raise("Can't get version information from node %s" % node)
4509     if constants.PROTOCOL_VERSION == result.payload:
4510       logging.info("Communication to node %s fine, sw version %s match",
4511                    node, result.payload)
4512     else:
4513       raise errors.OpExecError("Version mismatch master version %s,"
4514                                " node version %s" %
4515                                (constants.PROTOCOL_VERSION, result.payload))
4516
4517     # Add node to our /etc/hosts, and add key to known_hosts
4518     if self.cfg.GetClusterInfo().modify_etc_hosts:
4519       master_node = self.cfg.GetMasterNode()
4520       result = self.rpc.call_etc_hosts_modify(master_node,
4521                                               constants.ETC_HOSTS_ADD,
4522                                               self.hostname.name,
4523                                               self.hostname.ip)
4524       result.Raise("Can't update hosts file with new host data")
4525
4526     if new_node.secondary_ip != new_node.primary_ip:
4527       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4528                                False)
4529
4530     node_verify_list = [self.cfg.GetMasterNode()]
4531     node_verify_param = {
4532       constants.NV_NODELIST: [node],
4533       # TODO: do a node-net-test as well?
4534     }
4535
4536     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4537                                        self.cfg.GetClusterName())
4538     for verifier in node_verify_list:
4539       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4540       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4541       if nl_payload:
4542         for failed in nl_payload:
4543           feedback_fn("ssh/hostname verification failed"
4544                       " (checking from %s): %s" %
4545                       (verifier, nl_payload[failed]))
4546         raise errors.OpExecError("ssh/hostname verification failed")
4547
4548     if self.op.readd:
4549       _RedistributeAncillaryFiles(self)
4550       self.context.ReaddNode(new_node)
4551       # make sure we redistribute the config
4552       self.cfg.Update(new_node, feedback_fn)
4553       # and make sure the new node will not have old files around
4554       if not new_node.master_candidate:
4555         result = self.rpc.call_node_demote_from_mc(new_node.name)
4556         msg = result.fail_msg
4557         if msg:
4558           self.LogWarning("Node failed to demote itself from master"
4559                           " candidate status: %s" % msg)
4560     else:
4561       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4562                                   additional_vm=self.op.vm_capable)
4563       self.context.AddNode(new_node, self.proc.GetECId())
4564
4565
4566 class LUNodeSetParams(LogicalUnit):
4567   """Modifies the parameters of a node.
4568
4569   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
4570       to the node role (as _ROLE_*)
4571   @cvar _R2F: a dictionary from node role to tuples of flags
4572   @cvar _FLAGS: a list of attribute names corresponding to the flags
4573
4574   """
4575   HPATH = "node-modify"
4576   HTYPE = constants.HTYPE_NODE
4577   REQ_BGL = False
4578   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
4579   _F2R = {
4580     (True, False, False): _ROLE_CANDIDATE,
4581     (False, True, False): _ROLE_DRAINED,
4582     (False, False, True): _ROLE_OFFLINE,
4583     (False, False, False): _ROLE_REGULAR,
4584     }
4585   _R2F = dict((v, k) for k, v in _F2R.items())
4586   _FLAGS = ["master_candidate", "drained", "offline"]
4587
4588   def CheckArguments(self):
4589     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4590     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
4591                 self.op.master_capable, self.op.vm_capable,
4592                 self.op.secondary_ip, self.op.ndparams]
4593     if all_mods.count(None) == len(all_mods):
4594       raise errors.OpPrereqError("Please pass at least one modification",
4595                                  errors.ECODE_INVAL)
4596     if all_mods.count(True) > 1:
4597       raise errors.OpPrereqError("Can't set the node into more than one"
4598                                  " state at the same time",
4599                                  errors.ECODE_INVAL)
4600
4601     # Boolean value that tells us whether we might be demoting from MC
4602     self.might_demote = (self.op.master_candidate == False or
4603                          self.op.offline == True or
4604                          self.op.drained == True or
4605                          self.op.master_capable == False)
4606
4607     if self.op.secondary_ip:
4608       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
4609         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4610                                    " address" % self.op.secondary_ip,
4611                                    errors.ECODE_INVAL)
4612
4613     self.lock_all = self.op.auto_promote and self.might_demote
4614     self.lock_instances = self.op.secondary_ip is not None
4615
4616   def ExpandNames(self):
4617     if self.lock_all:
4618       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
4619     else:
4620       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
4621
4622     if self.lock_instances:
4623       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
4624
4625   def DeclareLocks(self, level):
4626     # If we have locked all instances, before waiting to lock nodes, release
4627     # all the ones living on nodes unrelated to the current operation.
4628     if level == locking.LEVEL_NODE and self.lock_instances:
4629       self.affected_instances = []
4630       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
4631         instances_keep = []
4632
4633         # Build list of instances to release
4634         for instance_name in self.acquired_locks[locking.LEVEL_INSTANCE]:
4635           instance = self.context.cfg.GetInstanceInfo(instance_name)
4636           if (instance.disk_template in constants.DTS_INT_MIRROR and
4637               self.op.node_name in instance.all_nodes):
4638             instances_keep.append(instance_name)
4639             self.affected_instances.append(instance)
4640
4641         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
4642
4643         assert (set(self.acquired_locks.get(locking.LEVEL_INSTANCE, [])) ==
4644                 set(instances_keep))
4645
4646   def BuildHooksEnv(self):
4647     """Build hooks env.
4648
4649     This runs on the master node.
4650
4651     """
4652     return {
4653       "OP_TARGET": self.op.node_name,
4654       "MASTER_CANDIDATE": str(self.op.master_candidate),
4655       "OFFLINE": str(self.op.offline),
4656       "DRAINED": str(self.op.drained),
4657       "MASTER_CAPABLE": str(self.op.master_capable),
4658       "VM_CAPABLE": str(self.op.vm_capable),
4659       }
4660
4661   def BuildHooksNodes(self):
4662     """Build hooks nodes.
4663
4664     """
4665     nl = [self.cfg.GetMasterNode(), self.op.node_name]
4666     return (nl, nl)
4667
4668   def CheckPrereq(self):
4669     """Check prerequisites.
4670
4671     This only checks the instance list against the existing names.
4672
4673     """
4674     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
4675
4676     if (self.op.master_candidate is not None or
4677         self.op.drained is not None or
4678         self.op.offline is not None):
4679       # we can't change the master's node flags
4680       if self.op.node_name == self.cfg.GetMasterNode():
4681         raise errors.OpPrereqError("The master role can be changed"
4682                                    " only via master-failover",
4683                                    errors.ECODE_INVAL)
4684
4685     if self.op.master_candidate and not node.master_capable:
4686       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
4687                                  " it a master candidate" % node.name,
4688                                  errors.ECODE_STATE)
4689
4690     if self.op.vm_capable == False:
4691       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
4692       if ipri or isec:
4693         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
4694                                    " the vm_capable flag" % node.name,
4695                                    errors.ECODE_STATE)
4696
4697     if node.master_candidate and self.might_demote and not self.lock_all:
4698       assert not self.op.auto_promote, "auto_promote set but lock_all not"
4699       # check if after removing the current node, we're missing master
4700       # candidates
4701       (mc_remaining, mc_should, _) = \
4702           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
4703       if mc_remaining < mc_should:
4704         raise errors.OpPrereqError("Not enough master candidates, please"
4705                                    " pass auto promote option to allow"
4706                                    " promotion", errors.ECODE_STATE)
4707
4708     self.old_flags = old_flags = (node.master_candidate,
4709                                   node.drained, node.offline)
4710     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
4711     self.old_role = old_role = self._F2R[old_flags]
4712
4713     # Check for ineffective changes
4714     for attr in self._FLAGS:
4715       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
4716         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
4717         setattr(self.op, attr, None)
4718
4719     # Past this point, any flag change to False means a transition
4720     # away from the respective state, as only real changes are kept
4721
4722     # TODO: We might query the real power state if it supports OOB
4723     if _SupportsOob(self.cfg, node):
4724       if self.op.offline is False and not (node.powered or
4725                                            self.op.powered == True):
4726         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
4727                                     " offline status can be reset") %
4728                                    self.op.node_name)
4729     elif self.op.powered is not None:
4730       raise errors.OpPrereqError(("Unable to change powered state for node %s"
4731                                   " as it does not support out-of-band"
4732                                   " handling") % self.op.node_name)
4733
4734     # If we're being deofflined/drained, we'll MC ourself if needed
4735     if (self.op.drained == False or self.op.offline == False or
4736         (self.op.master_capable and not node.master_capable)):
4737       if _DecideSelfPromotion(self):
4738         self.op.master_candidate = True
4739         self.LogInfo("Auto-promoting node to master candidate")
4740
4741     # If we're no longer master capable, we'll demote ourselves from MC
4742     if self.op.master_capable == False and node.master_candidate:
4743       self.LogInfo("Demoting from master candidate")
4744       self.op.master_candidate = False
4745
4746     # Compute new role
4747     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
4748     if self.op.master_candidate:
4749       new_role = self._ROLE_CANDIDATE
4750     elif self.op.drained:
4751       new_role = self._ROLE_DRAINED
4752     elif self.op.offline:
4753       new_role = self._ROLE_OFFLINE
4754     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
4755       # False is still in new flags, which means we're un-setting (the
4756       # only) True flag
4757       new_role = self._ROLE_REGULAR
4758     else: # no new flags, nothing, keep old role
4759       new_role = old_role
4760
4761     self.new_role = new_role
4762
4763     if old_role == self._ROLE_OFFLINE and new_role != old_role:
4764       # Trying to transition out of offline status
4765       result = self.rpc.call_version([node.name])[node.name]
4766       if result.fail_msg:
4767         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
4768                                    " to report its version: %s" %
4769                                    (node.name, result.fail_msg),
4770                                    errors.ECODE_STATE)
4771       else:
4772         self.LogWarning("Transitioning node from offline to online state"
4773                         " without using re-add. Please make sure the node"
4774                         " is healthy!")
4775
4776     if self.op.secondary_ip:
4777       # Ok even without locking, because this can't be changed by any LU
4778       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
4779       master_singlehomed = master.secondary_ip == master.primary_ip
4780       if master_singlehomed and self.op.secondary_ip:
4781         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
4782                                    " homed cluster", errors.ECODE_INVAL)
4783
4784       if node.offline:
4785         if self.affected_instances:
4786           raise errors.OpPrereqError("Cannot change secondary ip: offline"
4787                                      " node has instances (%s) configured"
4788                                      " to use it" % self.affected_instances)
4789       else:
4790         # On online nodes, check that no instances are running, and that
4791         # the node has the new ip and we can reach it.
4792         for instance in self.affected_instances:
4793           _CheckInstanceDown(self, instance, "cannot change secondary ip")
4794
4795         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
4796         if master.name != node.name:
4797           # check reachability from master secondary ip to new secondary ip
4798           if not netutils.TcpPing(self.op.secondary_ip,
4799                                   constants.DEFAULT_NODED_PORT,
4800                                   source=master.secondary_ip):
4801             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4802                                        " based ping to node daemon port",
4803                                        errors.ECODE_ENVIRON)
4804
4805     if self.op.ndparams:
4806       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
4807       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
4808       self.new_ndparams = new_ndparams
4809
4810   def Exec(self, feedback_fn):
4811     """Modifies a node.
4812
4813     """
4814     node = self.node
4815     old_role = self.old_role
4816     new_role = self.new_role
4817
4818     result = []
4819
4820     if self.op.ndparams:
4821       node.ndparams = self.new_ndparams
4822
4823     if self.op.powered is not None:
4824       node.powered = self.op.powered
4825
4826     for attr in ["master_capable", "vm_capable"]:
4827       val = getattr(self.op, attr)
4828       if val is not None:
4829         setattr(node, attr, val)
4830         result.append((attr, str(val)))
4831
4832     if new_role != old_role:
4833       # Tell the node to demote itself, if no longer MC and not offline
4834       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
4835         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
4836         if msg:
4837           self.LogWarning("Node failed to demote itself: %s", msg)
4838
4839       new_flags = self._R2F[new_role]
4840       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
4841         if of != nf:
4842           result.append((desc, str(nf)))
4843       (node.master_candidate, node.drained, node.offline) = new_flags
4844
4845       # we locked all nodes, we adjust the CP before updating this node
4846       if self.lock_all:
4847         _AdjustCandidatePool(self, [node.name])
4848
4849     if self.op.secondary_ip:
4850       node.secondary_ip = self.op.secondary_ip
4851       result.append(("secondary_ip", self.op.secondary_ip))
4852
4853     # this will trigger configuration file update, if needed
4854     self.cfg.Update(node, feedback_fn)
4855
4856     # this will trigger job queue propagation or cleanup if the mc
4857     # flag changed
4858     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
4859       self.context.ReaddNode(node)
4860
4861     return result
4862
4863
4864 class LUNodePowercycle(NoHooksLU):
4865   """Powercycles a node.
4866
4867   """
4868   REQ_BGL = False
4869
4870   def CheckArguments(self):
4871     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4872     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
4873       raise errors.OpPrereqError("The node is the master and the force"
4874                                  " parameter was not set",
4875                                  errors.ECODE_INVAL)
4876
4877   def ExpandNames(self):
4878     """Locking for PowercycleNode.
4879
4880     This is a last-resort option and shouldn't block on other
4881     jobs. Therefore, we grab no locks.
4882
4883     """
4884     self.needed_locks = {}
4885
4886   def Exec(self, feedback_fn):
4887     """Reboots a node.
4888
4889     """
4890     result = self.rpc.call_node_powercycle(self.op.node_name,
4891                                            self.cfg.GetHypervisorType())
4892     result.Raise("Failed to schedule the reboot")
4893     return result.payload
4894
4895
4896 class LUClusterQuery(NoHooksLU):
4897   """Query cluster configuration.
4898
4899   """
4900   REQ_BGL = False
4901
4902   def ExpandNames(self):
4903     self.needed_locks = {}
4904
4905   def Exec(self, feedback_fn):
4906     """Return cluster config.
4907
4908     """
4909     cluster = self.cfg.GetClusterInfo()
4910     os_hvp = {}
4911
4912     # Filter just for enabled hypervisors
4913     for os_name, hv_dict in cluster.os_hvp.items():
4914       os_hvp[os_name] = {}
4915       for hv_name, hv_params in hv_dict.items():
4916         if hv_name in cluster.enabled_hypervisors:
4917           os_hvp[os_name][hv_name] = hv_params
4918
4919     # Convert ip_family to ip_version
4920     primary_ip_version = constants.IP4_VERSION
4921     if cluster.primary_ip_family == netutils.IP6Address.family:
4922       primary_ip_version = constants.IP6_VERSION
4923
4924     result = {
4925       "software_version": constants.RELEASE_VERSION,
4926       "protocol_version": constants.PROTOCOL_VERSION,
4927       "config_version": constants.CONFIG_VERSION,
4928       "os_api_version": max(constants.OS_API_VERSIONS),
4929       "export_version": constants.EXPORT_VERSION,
4930       "architecture": (platform.architecture()[0], platform.machine()),
4931       "name": cluster.cluster_name,
4932       "master": cluster.master_node,
4933       "default_hypervisor": cluster.enabled_hypervisors[0],
4934       "enabled_hypervisors": cluster.enabled_hypervisors,
4935       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
4936                         for hypervisor_name in cluster.enabled_hypervisors]),
4937       "os_hvp": os_hvp,
4938       "beparams": cluster.beparams,
4939       "osparams": cluster.osparams,
4940       "nicparams": cluster.nicparams,
4941       "ndparams": cluster.ndparams,
4942       "candidate_pool_size": cluster.candidate_pool_size,
4943       "master_netdev": cluster.master_netdev,
4944       "volume_group_name": cluster.volume_group_name,
4945       "drbd_usermode_helper": cluster.drbd_usermode_helper,
4946       "file_storage_dir": cluster.file_storage_dir,
4947       "shared_file_storage_dir": cluster.shared_file_storage_dir,
4948       "maintain_node_health": cluster.maintain_node_health,
4949       "ctime": cluster.ctime,
4950       "mtime": cluster.mtime,
4951       "uuid": cluster.uuid,
4952       "tags": list(cluster.GetTags()),
4953       "uid_pool": cluster.uid_pool,
4954       "default_iallocator": cluster.default_iallocator,
4955       "reserved_lvs": cluster.reserved_lvs,
4956       "primary_ip_version": primary_ip_version,
4957       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
4958       "hidden_os": cluster.hidden_os,
4959       "blacklisted_os": cluster.blacklisted_os,
4960       }
4961
4962     return result
4963
4964
4965 class LUClusterConfigQuery(NoHooksLU):
4966   """Return configuration values.
4967
4968   """
4969   REQ_BGL = False
4970   _FIELDS_DYNAMIC = utils.FieldSet()
4971   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
4972                                   "watcher_pause", "volume_group_name")
4973
4974   def CheckArguments(self):
4975     _CheckOutputFields(static=self._FIELDS_STATIC,
4976                        dynamic=self._FIELDS_DYNAMIC,
4977                        selected=self.op.output_fields)
4978
4979   def ExpandNames(self):
4980     self.needed_locks = {}
4981
4982   def Exec(self, feedback_fn):
4983     """Dump a representation of the cluster config to the standard output.
4984
4985     """
4986     values = []
4987     for field in self.op.output_fields:
4988       if field == "cluster_name":
4989         entry = self.cfg.GetClusterName()
4990       elif field == "master_node":
4991         entry = self.cfg.GetMasterNode()
4992       elif field == "drain_flag":
4993         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
4994       elif field == "watcher_pause":
4995         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
4996       elif field == "volume_group_name":
4997         entry = self.cfg.GetVGName()
4998       else:
4999         raise errors.ParameterError(field)
5000       values.append(entry)
5001     return values
5002
5003
5004 class LUInstanceActivateDisks(NoHooksLU):
5005   """Bring up an instance's disks.
5006
5007   """
5008   REQ_BGL = False
5009
5010   def ExpandNames(self):
5011     self._ExpandAndLockInstance()
5012     self.needed_locks[locking.LEVEL_NODE] = []
5013     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5014
5015   def DeclareLocks(self, level):
5016     if level == locking.LEVEL_NODE:
5017       self._LockInstancesNodes()
5018
5019   def CheckPrereq(self):
5020     """Check prerequisites.
5021
5022     This checks that the instance is in the cluster.
5023
5024     """
5025     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5026     assert self.instance is not None, \
5027       "Cannot retrieve locked instance %s" % self.op.instance_name
5028     _CheckNodeOnline(self, self.instance.primary_node)
5029
5030   def Exec(self, feedback_fn):
5031     """Activate the disks.
5032
5033     """
5034     disks_ok, disks_info = \
5035               _AssembleInstanceDisks(self, self.instance,
5036                                      ignore_size=self.op.ignore_size)
5037     if not disks_ok:
5038       raise errors.OpExecError("Cannot activate block devices")
5039
5040     return disks_info
5041
5042
5043 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5044                            ignore_size=False):
5045   """Prepare the block devices for an instance.
5046
5047   This sets up the block devices on all nodes.
5048
5049   @type lu: L{LogicalUnit}
5050   @param lu: the logical unit on whose behalf we execute
5051   @type instance: L{objects.Instance}
5052   @param instance: the instance for whose disks we assemble
5053   @type disks: list of L{objects.Disk} or None
5054   @param disks: which disks to assemble (or all, if None)
5055   @type ignore_secondaries: boolean
5056   @param ignore_secondaries: if true, errors on secondary nodes
5057       won't result in an error return from the function
5058   @type ignore_size: boolean
5059   @param ignore_size: if true, the current known size of the disk
5060       will not be used during the disk activation, useful for cases
5061       when the size is wrong
5062   @return: False if the operation failed, otherwise a list of
5063       (host, instance_visible_name, node_visible_name)
5064       with the mapping from node devices to instance devices
5065
5066   """
5067   device_info = []
5068   disks_ok = True
5069   iname = instance.name
5070   disks = _ExpandCheckDisks(instance, disks)
5071
5072   # With the two passes mechanism we try to reduce the window of
5073   # opportunity for the race condition of switching DRBD to primary
5074   # before handshaking occured, but we do not eliminate it
5075
5076   # The proper fix would be to wait (with some limits) until the
5077   # connection has been made and drbd transitions from WFConnection
5078   # into any other network-connected state (Connected, SyncTarget,
5079   # SyncSource, etc.)
5080
5081   # 1st pass, assemble on all nodes in secondary mode
5082   for idx, inst_disk in enumerate(disks):
5083     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5084       if ignore_size:
5085         node_disk = node_disk.Copy()
5086         node_disk.UnsetSize()
5087       lu.cfg.SetDiskID(node_disk, node)
5088       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5089       msg = result.fail_msg
5090       if msg:
5091         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5092                            " (is_primary=False, pass=1): %s",
5093                            inst_disk.iv_name, node, msg)
5094         if not ignore_secondaries:
5095           disks_ok = False
5096
5097   # FIXME: race condition on drbd migration to primary
5098
5099   # 2nd pass, do only the primary node
5100   for idx, inst_disk in enumerate(disks):
5101     dev_path = None
5102
5103     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5104       if node != instance.primary_node:
5105         continue
5106       if ignore_size:
5107         node_disk = node_disk.Copy()
5108         node_disk.UnsetSize()
5109       lu.cfg.SetDiskID(node_disk, node)
5110       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5111       msg = result.fail_msg
5112       if msg:
5113         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5114                            " (is_primary=True, pass=2): %s",
5115                            inst_disk.iv_name, node, msg)
5116         disks_ok = False
5117       else:
5118         dev_path = result.payload
5119
5120     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5121
5122   # leave the disks configured for the primary node
5123   # this is a workaround that would be fixed better by
5124   # improving the logical/physical id handling
5125   for disk in disks:
5126     lu.cfg.SetDiskID(disk, instance.primary_node)
5127
5128   return disks_ok, device_info
5129
5130
5131 def _StartInstanceDisks(lu, instance, force):
5132   """Start the disks of an instance.
5133
5134   """
5135   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5136                                            ignore_secondaries=force)
5137   if not disks_ok:
5138     _ShutdownInstanceDisks(lu, instance)
5139     if force is not None and not force:
5140       lu.proc.LogWarning("", hint="If the message above refers to a"
5141                          " secondary node,"
5142                          " you can retry the operation using '--force'.")
5143     raise errors.OpExecError("Disk consistency error")
5144
5145
5146 class LUInstanceDeactivateDisks(NoHooksLU):
5147   """Shutdown an instance's disks.
5148
5149   """
5150   REQ_BGL = False
5151
5152   def ExpandNames(self):
5153     self._ExpandAndLockInstance()
5154     self.needed_locks[locking.LEVEL_NODE] = []
5155     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5156
5157   def DeclareLocks(self, level):
5158     if level == locking.LEVEL_NODE:
5159       self._LockInstancesNodes()
5160
5161   def CheckPrereq(self):
5162     """Check prerequisites.
5163
5164     This checks that the instance is in the cluster.
5165
5166     """
5167     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5168     assert self.instance is not None, \
5169       "Cannot retrieve locked instance %s" % self.op.instance_name
5170
5171   def Exec(self, feedback_fn):
5172     """Deactivate the disks
5173
5174     """
5175     instance = self.instance
5176     if self.op.force:
5177       _ShutdownInstanceDisks(self, instance)
5178     else:
5179       _SafeShutdownInstanceDisks(self, instance)
5180
5181
5182 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5183   """Shutdown block devices of an instance.
5184
5185   This function checks if an instance is running, before calling
5186   _ShutdownInstanceDisks.
5187
5188   """
5189   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5190   _ShutdownInstanceDisks(lu, instance, disks=disks)
5191
5192
5193 def _ExpandCheckDisks(instance, disks):
5194   """Return the instance disks selected by the disks list
5195
5196   @type disks: list of L{objects.Disk} or None
5197   @param disks: selected disks
5198   @rtype: list of L{objects.Disk}
5199   @return: selected instance disks to act on
5200
5201   """
5202   if disks is None:
5203     return instance.disks
5204   else:
5205     if not set(disks).issubset(instance.disks):
5206       raise errors.ProgrammerError("Can only act on disks belonging to the"
5207                                    " target instance")
5208     return disks
5209
5210
5211 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5212   """Shutdown block devices of an instance.
5213
5214   This does the shutdown on all nodes of the instance.
5215
5216   If the ignore_primary is false, errors on the primary node are
5217   ignored.
5218
5219   """
5220   all_result = True
5221   disks = _ExpandCheckDisks(instance, disks)
5222
5223   for disk in disks:
5224     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5225       lu.cfg.SetDiskID(top_disk, node)
5226       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5227       msg = result.fail_msg
5228       if msg:
5229         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5230                       disk.iv_name, node, msg)
5231         if ((node == instance.primary_node and not ignore_primary) or
5232             (node != instance.primary_node and not result.offline)):
5233           all_result = False
5234   return all_result
5235
5236
5237 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5238   """Checks if a node has enough free memory.
5239
5240   This function check if a given node has the needed amount of free
5241   memory. In case the node has less memory or we cannot get the
5242   information from the node, this function raise an OpPrereqError
5243   exception.
5244
5245   @type lu: C{LogicalUnit}
5246   @param lu: a logical unit from which we get configuration data
5247   @type node: C{str}
5248   @param node: the node to check
5249   @type reason: C{str}
5250   @param reason: string to use in the error message
5251   @type requested: C{int}
5252   @param requested: the amount of memory in MiB to check for
5253   @type hypervisor_name: C{str}
5254   @param hypervisor_name: the hypervisor to ask for memory stats
5255   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5256       we cannot check the node
5257
5258   """
5259   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5260   nodeinfo[node].Raise("Can't get data from node %s" % node,
5261                        prereq=True, ecode=errors.ECODE_ENVIRON)
5262   free_mem = nodeinfo[node].payload.get('memory_free', None)
5263   if not isinstance(free_mem, int):
5264     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5265                                " was '%s'" % (node, free_mem),
5266                                errors.ECODE_ENVIRON)
5267   if requested > free_mem:
5268     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5269                                " needed %s MiB, available %s MiB" %
5270                                (node, reason, requested, free_mem),
5271                                errors.ECODE_NORES)
5272
5273
5274 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5275   """Checks if nodes have enough free disk space in the all VGs.
5276
5277   This function check if all given nodes have the needed amount of
5278   free disk. In case any node has less disk or we cannot get the
5279   information from the node, this function raise an OpPrereqError
5280   exception.
5281
5282   @type lu: C{LogicalUnit}
5283   @param lu: a logical unit from which we get configuration data
5284   @type nodenames: C{list}
5285   @param nodenames: the list of node names to check
5286   @type req_sizes: C{dict}
5287   @param req_sizes: the hash of vg and corresponding amount of disk in
5288       MiB to check for
5289   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5290       or we cannot check the node
5291
5292   """
5293   for vg, req_size in req_sizes.items():
5294     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5295
5296
5297 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5298   """Checks if nodes have enough free disk space in the specified VG.
5299
5300   This function check if all given nodes have the needed amount of
5301   free disk. In case any node has less disk or we cannot get the
5302   information from the node, this function raise an OpPrereqError
5303   exception.
5304
5305   @type lu: C{LogicalUnit}
5306   @param lu: a logical unit from which we get configuration data
5307   @type nodenames: C{list}
5308   @param nodenames: the list of node names to check
5309   @type vg: C{str}
5310   @param vg: the volume group to check
5311   @type requested: C{int}
5312   @param requested: the amount of disk in MiB to check for
5313   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5314       or we cannot check the node
5315
5316   """
5317   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5318   for node in nodenames:
5319     info = nodeinfo[node]
5320     info.Raise("Cannot get current information from node %s" % node,
5321                prereq=True, ecode=errors.ECODE_ENVIRON)
5322     vg_free = info.payload.get("vg_free", None)
5323     if not isinstance(vg_free, int):
5324       raise errors.OpPrereqError("Can't compute free disk space on node"
5325                                  " %s for vg %s, result was '%s'" %
5326                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5327     if requested > vg_free:
5328       raise errors.OpPrereqError("Not enough disk space on target node %s"
5329                                  " vg %s: required %d MiB, available %d MiB" %
5330                                  (node, vg, requested, vg_free),
5331                                  errors.ECODE_NORES)
5332
5333
5334 class LUInstanceStartup(LogicalUnit):
5335   """Starts an instance.
5336
5337   """
5338   HPATH = "instance-start"
5339   HTYPE = constants.HTYPE_INSTANCE
5340   REQ_BGL = False
5341
5342   def CheckArguments(self):
5343     # extra beparams
5344     if self.op.beparams:
5345       # fill the beparams dict
5346       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5347
5348   def ExpandNames(self):
5349     self._ExpandAndLockInstance()
5350
5351   def BuildHooksEnv(self):
5352     """Build hooks env.
5353
5354     This runs on master, primary and secondary nodes of the instance.
5355
5356     """
5357     env = {
5358       "FORCE": self.op.force,
5359       }
5360
5361     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5362
5363     return env
5364
5365   def BuildHooksNodes(self):
5366     """Build hooks nodes.
5367
5368     """
5369     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5370     return (nl, nl)
5371
5372   def CheckPrereq(self):
5373     """Check prerequisites.
5374
5375     This checks that the instance is in the cluster.
5376
5377     """
5378     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5379     assert self.instance is not None, \
5380       "Cannot retrieve locked instance %s" % self.op.instance_name
5381
5382     # extra hvparams
5383     if self.op.hvparams:
5384       # check hypervisor parameter syntax (locally)
5385       cluster = self.cfg.GetClusterInfo()
5386       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5387       filled_hvp = cluster.FillHV(instance)
5388       filled_hvp.update(self.op.hvparams)
5389       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5390       hv_type.CheckParameterSyntax(filled_hvp)
5391       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5392
5393     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5394
5395     if self.primary_offline and self.op.ignore_offline_nodes:
5396       self.proc.LogWarning("Ignoring offline primary node")
5397
5398       if self.op.hvparams or self.op.beparams:
5399         self.proc.LogWarning("Overridden parameters are ignored")
5400     else:
5401       _CheckNodeOnline(self, instance.primary_node)
5402
5403       bep = self.cfg.GetClusterInfo().FillBE(instance)
5404
5405       # check bridges existence
5406       _CheckInstanceBridgesExist(self, instance)
5407
5408       remote_info = self.rpc.call_instance_info(instance.primary_node,
5409                                                 instance.name,
5410                                                 instance.hypervisor)
5411       remote_info.Raise("Error checking node %s" % instance.primary_node,
5412                         prereq=True, ecode=errors.ECODE_ENVIRON)
5413       if not remote_info.payload: # not running already
5414         _CheckNodeFreeMemory(self, instance.primary_node,
5415                              "starting instance %s" % instance.name,
5416                              bep[constants.BE_MEMORY], instance.hypervisor)
5417
5418   def Exec(self, feedback_fn):
5419     """Start the instance.
5420
5421     """
5422     instance = self.instance
5423     force = self.op.force
5424
5425     self.cfg.MarkInstanceUp(instance.name)
5426
5427     if self.primary_offline:
5428       assert self.op.ignore_offline_nodes
5429       self.proc.LogInfo("Primary node offline, marked instance as started")
5430     else:
5431       node_current = instance.primary_node
5432
5433       _StartInstanceDisks(self, instance, force)
5434
5435       result = self.rpc.call_instance_start(node_current, instance,
5436                                             self.op.hvparams, self.op.beparams)
5437       msg = result.fail_msg
5438       if msg:
5439         _ShutdownInstanceDisks(self, instance)
5440         raise errors.OpExecError("Could not start instance: %s" % msg)
5441
5442
5443 class LUInstanceReboot(LogicalUnit):
5444   """Reboot an instance.
5445
5446   """
5447   HPATH = "instance-reboot"
5448   HTYPE = constants.HTYPE_INSTANCE
5449   REQ_BGL = False
5450
5451   def ExpandNames(self):
5452     self._ExpandAndLockInstance()
5453
5454   def BuildHooksEnv(self):
5455     """Build hooks env.
5456
5457     This runs on master, primary and secondary nodes of the instance.
5458
5459     """
5460     env = {
5461       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5462       "REBOOT_TYPE": self.op.reboot_type,
5463       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5464       }
5465
5466     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5467
5468     return env
5469
5470   def BuildHooksNodes(self):
5471     """Build hooks nodes.
5472
5473     """
5474     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5475     return (nl, nl)
5476
5477   def CheckPrereq(self):
5478     """Check prerequisites.
5479
5480     This checks that the instance is in the cluster.
5481
5482     """
5483     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5484     assert self.instance is not None, \
5485       "Cannot retrieve locked instance %s" % self.op.instance_name
5486
5487     _CheckNodeOnline(self, instance.primary_node)
5488
5489     # check bridges existence
5490     _CheckInstanceBridgesExist(self, instance)
5491
5492   def Exec(self, feedback_fn):
5493     """Reboot the instance.
5494
5495     """
5496     instance = self.instance
5497     ignore_secondaries = self.op.ignore_secondaries
5498     reboot_type = self.op.reboot_type
5499
5500     remote_info = self.rpc.call_instance_info(instance.primary_node,
5501                                               instance.name,
5502                                               instance.hypervisor)
5503     remote_info.Raise("Error checking node %s" % instance.primary_node)
5504     instance_running = bool(remote_info.payload)
5505
5506     node_current = instance.primary_node
5507
5508     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5509                                             constants.INSTANCE_REBOOT_HARD]:
5510       for disk in instance.disks:
5511         self.cfg.SetDiskID(disk, node_current)
5512       result = self.rpc.call_instance_reboot(node_current, instance,
5513                                              reboot_type,
5514                                              self.op.shutdown_timeout)
5515       result.Raise("Could not reboot instance")
5516     else:
5517       if instance_running:
5518         result = self.rpc.call_instance_shutdown(node_current, instance,
5519                                                  self.op.shutdown_timeout)
5520         result.Raise("Could not shutdown instance for full reboot")
5521         _ShutdownInstanceDisks(self, instance)
5522       else:
5523         self.LogInfo("Instance %s was already stopped, starting now",
5524                      instance.name)
5525       _StartInstanceDisks(self, instance, ignore_secondaries)
5526       result = self.rpc.call_instance_start(node_current, instance, None, None)
5527       msg = result.fail_msg
5528       if msg:
5529         _ShutdownInstanceDisks(self, instance)
5530         raise errors.OpExecError("Could not start instance for"
5531                                  " full reboot: %s" % msg)
5532
5533     self.cfg.MarkInstanceUp(instance.name)
5534
5535
5536 class LUInstanceShutdown(LogicalUnit):
5537   """Shutdown an instance.
5538
5539   """
5540   HPATH = "instance-stop"
5541   HTYPE = constants.HTYPE_INSTANCE
5542   REQ_BGL = False
5543
5544   def ExpandNames(self):
5545     self._ExpandAndLockInstance()
5546
5547   def BuildHooksEnv(self):
5548     """Build hooks env.
5549
5550     This runs on master, primary and secondary nodes of the instance.
5551
5552     """
5553     env = _BuildInstanceHookEnvByObject(self, self.instance)
5554     env["TIMEOUT"] = self.op.timeout
5555     return env
5556
5557   def BuildHooksNodes(self):
5558     """Build hooks nodes.
5559
5560     """
5561     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5562     return (nl, nl)
5563
5564   def CheckPrereq(self):
5565     """Check prerequisites.
5566
5567     This checks that the instance is in the cluster.
5568
5569     """
5570     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5571     assert self.instance is not None, \
5572       "Cannot retrieve locked instance %s" % self.op.instance_name
5573
5574     self.primary_offline = \
5575       self.cfg.GetNodeInfo(self.instance.primary_node).offline
5576
5577     if self.primary_offline and self.op.ignore_offline_nodes:
5578       self.proc.LogWarning("Ignoring offline primary node")
5579     else:
5580       _CheckNodeOnline(self, self.instance.primary_node)
5581
5582   def Exec(self, feedback_fn):
5583     """Shutdown the instance.
5584
5585     """
5586     instance = self.instance
5587     node_current = instance.primary_node
5588     timeout = self.op.timeout
5589
5590     self.cfg.MarkInstanceDown(instance.name)
5591
5592     if self.primary_offline:
5593       assert self.op.ignore_offline_nodes
5594       self.proc.LogInfo("Primary node offline, marked instance as stopped")
5595     else:
5596       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
5597       msg = result.fail_msg
5598       if msg:
5599         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
5600
5601       _ShutdownInstanceDisks(self, instance)
5602
5603
5604 class LUInstanceReinstall(LogicalUnit):
5605   """Reinstall an instance.
5606
5607   """
5608   HPATH = "instance-reinstall"
5609   HTYPE = constants.HTYPE_INSTANCE
5610   REQ_BGL = False
5611
5612   def ExpandNames(self):
5613     self._ExpandAndLockInstance()
5614
5615   def BuildHooksEnv(self):
5616     """Build hooks env.
5617
5618     This runs on master, primary and secondary nodes of the instance.
5619
5620     """
5621     return _BuildInstanceHookEnvByObject(self, self.instance)
5622
5623   def BuildHooksNodes(self):
5624     """Build hooks nodes.
5625
5626     """
5627     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5628     return (nl, nl)
5629
5630   def CheckPrereq(self):
5631     """Check prerequisites.
5632
5633     This checks that the instance is in the cluster and is not running.
5634
5635     """
5636     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5637     assert instance is not None, \
5638       "Cannot retrieve locked instance %s" % self.op.instance_name
5639     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
5640                      " offline, cannot reinstall")
5641     for node in instance.secondary_nodes:
5642       _CheckNodeOnline(self, node, "Instance secondary node offline,"
5643                        " cannot reinstall")
5644
5645     if instance.disk_template == constants.DT_DISKLESS:
5646       raise errors.OpPrereqError("Instance '%s' has no disks" %
5647                                  self.op.instance_name,
5648                                  errors.ECODE_INVAL)
5649     _CheckInstanceDown(self, instance, "cannot reinstall")
5650
5651     if self.op.os_type is not None:
5652       # OS verification
5653       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
5654       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
5655       instance_os = self.op.os_type
5656     else:
5657       instance_os = instance.os
5658
5659     nodelist = list(instance.all_nodes)
5660
5661     if self.op.osparams:
5662       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
5663       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
5664       self.os_inst = i_osdict # the new dict (without defaults)
5665     else:
5666       self.os_inst = None
5667
5668     self.instance = instance
5669
5670   def Exec(self, feedback_fn):
5671     """Reinstall the instance.
5672
5673     """
5674     inst = self.instance
5675
5676     if self.op.os_type is not None:
5677       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
5678       inst.os = self.op.os_type
5679       # Write to configuration
5680       self.cfg.Update(inst, feedback_fn)
5681
5682     _StartInstanceDisks(self, inst, None)
5683     try:
5684       feedback_fn("Running the instance OS create scripts...")
5685       # FIXME: pass debug option from opcode to backend
5686       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
5687                                              self.op.debug_level,
5688                                              osparams=self.os_inst)
5689       result.Raise("Could not install OS for instance %s on node %s" %
5690                    (inst.name, inst.primary_node))
5691     finally:
5692       _ShutdownInstanceDisks(self, inst)
5693
5694
5695 class LUInstanceRecreateDisks(LogicalUnit):
5696   """Recreate an instance's missing disks.
5697
5698   """
5699   HPATH = "instance-recreate-disks"
5700   HTYPE = constants.HTYPE_INSTANCE
5701   REQ_BGL = False
5702
5703   def ExpandNames(self):
5704     self._ExpandAndLockInstance()
5705
5706   def BuildHooksEnv(self):
5707     """Build hooks env.
5708
5709     This runs on master, primary and secondary nodes of the instance.
5710
5711     """
5712     return _BuildInstanceHookEnvByObject(self, self.instance)
5713
5714   def BuildHooksNodes(self):
5715     """Build hooks nodes.
5716
5717     """
5718     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5719     return (nl, nl)
5720
5721   def CheckPrereq(self):
5722     """Check prerequisites.
5723
5724     This checks that the instance is in the cluster and is not running.
5725
5726     """
5727     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5728     assert instance is not None, \
5729       "Cannot retrieve locked instance %s" % self.op.instance_name
5730     _CheckNodeOnline(self, instance.primary_node)
5731
5732     if instance.disk_template == constants.DT_DISKLESS:
5733       raise errors.OpPrereqError("Instance '%s' has no disks" %
5734                                  self.op.instance_name, errors.ECODE_INVAL)
5735     _CheckInstanceDown(self, instance, "cannot recreate disks")
5736
5737     if not self.op.disks:
5738       self.op.disks = range(len(instance.disks))
5739     else:
5740       for idx in self.op.disks:
5741         if idx >= len(instance.disks):
5742           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
5743                                      errors.ECODE_INVAL)
5744
5745     self.instance = instance
5746
5747   def Exec(self, feedback_fn):
5748     """Recreate the disks.
5749
5750     """
5751     to_skip = []
5752     for idx, _ in enumerate(self.instance.disks):
5753       if idx not in self.op.disks: # disk idx has not been passed in
5754         to_skip.append(idx)
5755         continue
5756
5757     _CreateDisks(self, self.instance, to_skip=to_skip)
5758
5759
5760 class LUInstanceRename(LogicalUnit):
5761   """Rename an instance.
5762
5763   """
5764   HPATH = "instance-rename"
5765   HTYPE = constants.HTYPE_INSTANCE
5766
5767   def CheckArguments(self):
5768     """Check arguments.
5769
5770     """
5771     if self.op.ip_check and not self.op.name_check:
5772       # TODO: make the ip check more flexible and not depend on the name check
5773       raise errors.OpPrereqError("IP address check requires a name check",
5774                                  errors.ECODE_INVAL)
5775
5776   def BuildHooksEnv(self):
5777     """Build hooks env.
5778
5779     This runs on master, primary and secondary nodes of the instance.
5780
5781     """
5782     env = _BuildInstanceHookEnvByObject(self, self.instance)
5783     env["INSTANCE_NEW_NAME"] = self.op.new_name
5784     return env
5785
5786   def BuildHooksNodes(self):
5787     """Build hooks nodes.
5788
5789     """
5790     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5791     return (nl, nl)
5792
5793   def CheckPrereq(self):
5794     """Check prerequisites.
5795
5796     This checks that the instance is in the cluster and is not running.
5797
5798     """
5799     self.op.instance_name = _ExpandInstanceName(self.cfg,
5800                                                 self.op.instance_name)
5801     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5802     assert instance is not None
5803     _CheckNodeOnline(self, instance.primary_node)
5804     _CheckInstanceDown(self, instance, "cannot rename")
5805     self.instance = instance
5806
5807     new_name = self.op.new_name
5808     if self.op.name_check:
5809       hostname = netutils.GetHostname(name=new_name)
5810       self.LogInfo("Resolved given name '%s' to '%s'", new_name,
5811                    hostname.name)
5812       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
5813         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
5814                                     " same as given hostname '%s'") %
5815                                     (hostname.name, self.op.new_name),
5816                                     errors.ECODE_INVAL)
5817       new_name = self.op.new_name = hostname.name
5818       if (self.op.ip_check and
5819           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
5820         raise errors.OpPrereqError("IP %s of instance %s already in use" %
5821                                    (hostname.ip, new_name),
5822                                    errors.ECODE_NOTUNIQUE)
5823
5824     instance_list = self.cfg.GetInstanceList()
5825     if new_name in instance_list and new_name != instance.name:
5826       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
5827                                  new_name, errors.ECODE_EXISTS)
5828
5829   def Exec(self, feedback_fn):
5830     """Rename the instance.
5831
5832     """
5833     inst = self.instance
5834     old_name = inst.name
5835
5836     rename_file_storage = False
5837     if (inst.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE) and
5838         self.op.new_name != inst.name):
5839       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5840       rename_file_storage = True
5841
5842     self.cfg.RenameInstance(inst.name, self.op.new_name)
5843     # Change the instance lock. This is definitely safe while we hold the BGL.
5844     # Otherwise the new lock would have to be added in acquired mode.
5845     assert self.REQ_BGL
5846     self.context.glm.remove(locking.LEVEL_INSTANCE, old_name)
5847     self.context.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
5848
5849     # re-read the instance from the configuration after rename
5850     inst = self.cfg.GetInstanceInfo(self.op.new_name)
5851
5852     if rename_file_storage:
5853       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
5854       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
5855                                                      old_file_storage_dir,
5856                                                      new_file_storage_dir)
5857       result.Raise("Could not rename on node %s directory '%s' to '%s'"
5858                    " (but the instance has been renamed in Ganeti)" %
5859                    (inst.primary_node, old_file_storage_dir,
5860                     new_file_storage_dir))
5861
5862     _StartInstanceDisks(self, inst, None)
5863     try:
5864       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
5865                                                  old_name, self.op.debug_level)
5866       msg = result.fail_msg
5867       if msg:
5868         msg = ("Could not run OS rename script for instance %s on node %s"
5869                " (but the instance has been renamed in Ganeti): %s" %
5870                (inst.name, inst.primary_node, msg))
5871         self.proc.LogWarning(msg)
5872     finally:
5873       _ShutdownInstanceDisks(self, inst)
5874
5875     return inst.name
5876
5877
5878 class LUInstanceRemove(LogicalUnit):
5879   """Remove an instance.
5880
5881   """
5882   HPATH = "instance-remove"
5883   HTYPE = constants.HTYPE_INSTANCE
5884   REQ_BGL = False
5885
5886   def ExpandNames(self):
5887     self._ExpandAndLockInstance()
5888     self.needed_locks[locking.LEVEL_NODE] = []
5889     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5890
5891   def DeclareLocks(self, level):
5892     if level == locking.LEVEL_NODE:
5893       self._LockInstancesNodes()
5894
5895   def BuildHooksEnv(self):
5896     """Build hooks env.
5897
5898     This runs on master, primary and secondary nodes of the instance.
5899
5900     """
5901     env = _BuildInstanceHookEnvByObject(self, self.instance)
5902     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
5903     return env
5904
5905   def BuildHooksNodes(self):
5906     """Build hooks nodes.
5907
5908     """
5909     nl = [self.cfg.GetMasterNode()]
5910     nl_post = list(self.instance.all_nodes) + nl
5911     return (nl, nl_post)
5912
5913   def CheckPrereq(self):
5914     """Check prerequisites.
5915
5916     This checks that the instance is in the cluster.
5917
5918     """
5919     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5920     assert self.instance is not None, \
5921       "Cannot retrieve locked instance %s" % self.op.instance_name
5922
5923   def Exec(self, feedback_fn):
5924     """Remove the instance.
5925
5926     """
5927     instance = self.instance
5928     logging.info("Shutting down instance %s on node %s",
5929                  instance.name, instance.primary_node)
5930
5931     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
5932                                              self.op.shutdown_timeout)
5933     msg = result.fail_msg
5934     if msg:
5935       if self.op.ignore_failures:
5936         feedback_fn("Warning: can't shutdown instance: %s" % msg)
5937       else:
5938         raise errors.OpExecError("Could not shutdown instance %s on"
5939                                  " node %s: %s" %
5940                                  (instance.name, instance.primary_node, msg))
5941
5942     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
5943
5944
5945 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
5946   """Utility function to remove an instance.
5947
5948   """
5949   logging.info("Removing block devices for instance %s", instance.name)
5950
5951   if not _RemoveDisks(lu, instance):
5952     if not ignore_failures:
5953       raise errors.OpExecError("Can't remove instance's disks")
5954     feedback_fn("Warning: can't remove instance's disks")
5955
5956   logging.info("Removing instance %s out of cluster config", instance.name)
5957
5958   lu.cfg.RemoveInstance(instance.name)
5959
5960   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
5961     "Instance lock removal conflict"
5962
5963   # Remove lock for the instance
5964   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
5965
5966
5967 class LUInstanceQuery(NoHooksLU):
5968   """Logical unit for querying instances.
5969
5970   """
5971   # pylint: disable-msg=W0142
5972   REQ_BGL = False
5973
5974   def CheckArguments(self):
5975     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
5976                              self.op.output_fields, self.op.use_locking)
5977
5978   def ExpandNames(self):
5979     self.iq.ExpandNames(self)
5980
5981   def DeclareLocks(self, level):
5982     self.iq.DeclareLocks(self, level)
5983
5984   def Exec(self, feedback_fn):
5985     return self.iq.OldStyleQuery(self)
5986
5987
5988 class LUInstanceFailover(LogicalUnit):
5989   """Failover an instance.
5990
5991   """
5992   HPATH = "instance-failover"
5993   HTYPE = constants.HTYPE_INSTANCE
5994   REQ_BGL = False
5995
5996   def CheckArguments(self):
5997     """Check the arguments.
5998
5999     """
6000     self.iallocator = getattr(self.op, "iallocator", None)
6001     self.target_node = getattr(self.op, "target_node", None)
6002
6003   def ExpandNames(self):
6004     self._ExpandAndLockInstance()
6005
6006     if self.op.target_node is not None:
6007       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6008
6009     self.needed_locks[locking.LEVEL_NODE] = []
6010     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6011
6012     ignore_consistency = self.op.ignore_consistency
6013     shutdown_timeout = self.op.shutdown_timeout
6014     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6015                                        cleanup=False,
6016                                        iallocator=self.op.iallocator,
6017                                        target_node=self.op.target_node,
6018                                        failover=True,
6019                                        ignore_consistency=ignore_consistency,
6020                                        shutdown_timeout=shutdown_timeout)
6021     self.tasklets = [self._migrater]
6022
6023   def DeclareLocks(self, level):
6024     if level == locking.LEVEL_NODE:
6025       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6026       if instance.disk_template in constants.DTS_EXT_MIRROR:
6027         if self.op.target_node is None:
6028           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6029         else:
6030           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6031                                                    self.op.target_node]
6032         del self.recalculate_locks[locking.LEVEL_NODE]
6033       else:
6034         self._LockInstancesNodes()
6035
6036   def BuildHooksEnv(self):
6037     """Build hooks env.
6038
6039     This runs on master, primary and secondary nodes of the instance.
6040
6041     """
6042     instance = self._migrater.instance
6043     source_node = instance.primary_node
6044     target_node = self._migrater.target_node
6045     env = {
6046       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6047       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6048       "OLD_PRIMARY": source_node,
6049       "NEW_PRIMARY": target_node,
6050       }
6051
6052     if instance.disk_template in constants.DTS_INT_MIRROR:
6053       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6054       env["NEW_SECONDARY"] = source_node
6055     else:
6056       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6057
6058     env.update(_BuildInstanceHookEnvByObject(self, instance))
6059
6060     return env
6061
6062   def BuildHooksNodes(self):
6063     """Build hooks nodes.
6064
6065     """
6066     instance = self._migrater.instance
6067     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6068     return (nl, nl + [instance.primary_node])
6069
6070
6071 class LUInstanceMigrate(LogicalUnit):
6072   """Migrate an instance.
6073
6074   This is migration without shutting down, compared to the failover,
6075   which is done with shutdown.
6076
6077   """
6078   HPATH = "instance-migrate"
6079   HTYPE = constants.HTYPE_INSTANCE
6080   REQ_BGL = False
6081
6082   def ExpandNames(self):
6083     self._ExpandAndLockInstance()
6084
6085     if self.op.target_node is not None:
6086       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6087
6088     self.needed_locks[locking.LEVEL_NODE] = []
6089     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6090
6091     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6092                                        cleanup=self.op.cleanup,
6093                                        iallocator=self.op.iallocator,
6094                                        target_node=self.op.target_node,
6095                                        failover=False,
6096                                        fallback=self.op.allow_failover)
6097     self.tasklets = [self._migrater]
6098
6099   def DeclareLocks(self, level):
6100     if level == locking.LEVEL_NODE:
6101       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6102       if instance.disk_template in constants.DTS_EXT_MIRROR:
6103         if self.op.target_node is None:
6104           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6105         else:
6106           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6107                                                    self.op.target_node]
6108         del self.recalculate_locks[locking.LEVEL_NODE]
6109       else:
6110         self._LockInstancesNodes()
6111
6112   def BuildHooksEnv(self):
6113     """Build hooks env.
6114
6115     This runs on master, primary and secondary nodes of the instance.
6116
6117     """
6118     instance = self._migrater.instance
6119     source_node = instance.primary_node
6120     target_node = self._migrater.target_node
6121     env = _BuildInstanceHookEnvByObject(self, instance)
6122     env.update({
6123       "MIGRATE_LIVE": self._migrater.live,
6124       "MIGRATE_CLEANUP": self.op.cleanup,
6125       "OLD_PRIMARY": source_node,
6126       "NEW_PRIMARY": target_node,
6127       })
6128
6129     if instance.disk_template in constants.DTS_INT_MIRROR:
6130       env["OLD_SECONDARY"] = target_node
6131       env["NEW_SECONDARY"] = source_node
6132     else:
6133       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6134
6135     return env
6136
6137   def BuildHooksNodes(self):
6138     """Build hooks nodes.
6139
6140     """
6141     instance = self._migrater.instance
6142     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6143     return (nl, nl + [instance.primary_node])
6144
6145
6146 class LUInstanceMove(LogicalUnit):
6147   """Move an instance by data-copying.
6148
6149   """
6150   HPATH = "instance-move"
6151   HTYPE = constants.HTYPE_INSTANCE
6152   REQ_BGL = False
6153
6154   def ExpandNames(self):
6155     self._ExpandAndLockInstance()
6156     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6157     self.op.target_node = target_node
6158     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6159     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6160
6161   def DeclareLocks(self, level):
6162     if level == locking.LEVEL_NODE:
6163       self._LockInstancesNodes(primary_only=True)
6164
6165   def BuildHooksEnv(self):
6166     """Build hooks env.
6167
6168     This runs on master, primary and secondary nodes of the instance.
6169
6170     """
6171     env = {
6172       "TARGET_NODE": self.op.target_node,
6173       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6174       }
6175     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6176     return env
6177
6178   def BuildHooksNodes(self):
6179     """Build hooks nodes.
6180
6181     """
6182     nl = [
6183       self.cfg.GetMasterNode(),
6184       self.instance.primary_node,
6185       self.op.target_node,
6186       ]
6187     return (nl, nl)
6188
6189   def CheckPrereq(self):
6190     """Check prerequisites.
6191
6192     This checks that the instance is in the cluster.
6193
6194     """
6195     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6196     assert self.instance is not None, \
6197       "Cannot retrieve locked instance %s" % self.op.instance_name
6198
6199     node = self.cfg.GetNodeInfo(self.op.target_node)
6200     assert node is not None, \
6201       "Cannot retrieve locked node %s" % self.op.target_node
6202
6203     self.target_node = target_node = node.name
6204
6205     if target_node == instance.primary_node:
6206       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6207                                  (instance.name, target_node),
6208                                  errors.ECODE_STATE)
6209
6210     bep = self.cfg.GetClusterInfo().FillBE(instance)
6211
6212     for idx, dsk in enumerate(instance.disks):
6213       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6214         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6215                                    " cannot copy" % idx, errors.ECODE_STATE)
6216
6217     _CheckNodeOnline(self, target_node)
6218     _CheckNodeNotDrained(self, target_node)
6219     _CheckNodeVmCapable(self, target_node)
6220
6221     if instance.admin_up:
6222       # check memory requirements on the secondary node
6223       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6224                            instance.name, bep[constants.BE_MEMORY],
6225                            instance.hypervisor)
6226     else:
6227       self.LogInfo("Not checking memory on the secondary node as"
6228                    " instance will not be started")
6229
6230     # check bridge existance
6231     _CheckInstanceBridgesExist(self, instance, node=target_node)
6232
6233   def Exec(self, feedback_fn):
6234     """Move an instance.
6235
6236     The move is done by shutting it down on its present node, copying
6237     the data over (slow) and starting it on the new node.
6238
6239     """
6240     instance = self.instance
6241
6242     source_node = instance.primary_node
6243     target_node = self.target_node
6244
6245     self.LogInfo("Shutting down instance %s on source node %s",
6246                  instance.name, source_node)
6247
6248     result = self.rpc.call_instance_shutdown(source_node, instance,
6249                                              self.op.shutdown_timeout)
6250     msg = result.fail_msg
6251     if msg:
6252       if self.op.ignore_consistency:
6253         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6254                              " Proceeding anyway. Please make sure node"
6255                              " %s is down. Error details: %s",
6256                              instance.name, source_node, source_node, msg)
6257       else:
6258         raise errors.OpExecError("Could not shutdown instance %s on"
6259                                  " node %s: %s" %
6260                                  (instance.name, source_node, msg))
6261
6262     # create the target disks
6263     try:
6264       _CreateDisks(self, instance, target_node=target_node)
6265     except errors.OpExecError:
6266       self.LogWarning("Device creation failed, reverting...")
6267       try:
6268         _RemoveDisks(self, instance, target_node=target_node)
6269       finally:
6270         self.cfg.ReleaseDRBDMinors(instance.name)
6271         raise
6272
6273     cluster_name = self.cfg.GetClusterInfo().cluster_name
6274
6275     errs = []
6276     # activate, get path, copy the data over
6277     for idx, disk in enumerate(instance.disks):
6278       self.LogInfo("Copying data for disk %d", idx)
6279       result = self.rpc.call_blockdev_assemble(target_node, disk,
6280                                                instance.name, True, idx)
6281       if result.fail_msg:
6282         self.LogWarning("Can't assemble newly created disk %d: %s",
6283                         idx, result.fail_msg)
6284         errs.append(result.fail_msg)
6285         break
6286       dev_path = result.payload
6287       result = self.rpc.call_blockdev_export(source_node, disk,
6288                                              target_node, dev_path,
6289                                              cluster_name)
6290       if result.fail_msg:
6291         self.LogWarning("Can't copy data over for disk %d: %s",
6292                         idx, result.fail_msg)
6293         errs.append(result.fail_msg)
6294         break
6295
6296     if errs:
6297       self.LogWarning("Some disks failed to copy, aborting")
6298       try:
6299         _RemoveDisks(self, instance, target_node=target_node)
6300       finally:
6301         self.cfg.ReleaseDRBDMinors(instance.name)
6302         raise errors.OpExecError("Errors during disk copy: %s" %
6303                                  (",".join(errs),))
6304
6305     instance.primary_node = target_node
6306     self.cfg.Update(instance, feedback_fn)
6307
6308     self.LogInfo("Removing the disks on the original node")
6309     _RemoveDisks(self, instance, target_node=source_node)
6310
6311     # Only start the instance if it's marked as up
6312     if instance.admin_up:
6313       self.LogInfo("Starting instance %s on node %s",
6314                    instance.name, target_node)
6315
6316       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6317                                            ignore_secondaries=True)
6318       if not disks_ok:
6319         _ShutdownInstanceDisks(self, instance)
6320         raise errors.OpExecError("Can't activate the instance's disks")
6321
6322       result = self.rpc.call_instance_start(target_node, instance, None, None)
6323       msg = result.fail_msg
6324       if msg:
6325         _ShutdownInstanceDisks(self, instance)
6326         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6327                                  (instance.name, target_node, msg))
6328
6329
6330 class LUNodeMigrate(LogicalUnit):
6331   """Migrate all instances from a node.
6332
6333   """
6334   HPATH = "node-migrate"
6335   HTYPE = constants.HTYPE_NODE
6336   REQ_BGL = False
6337
6338   def CheckArguments(self):
6339     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
6340
6341   def ExpandNames(self):
6342     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6343
6344     self.needed_locks = {}
6345
6346     # Create tasklets for migrating instances for all instances on this node
6347     names = []
6348     tasklets = []
6349
6350     self.lock_all_nodes = False
6351
6352     for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name):
6353       logging.debug("Migrating instance %s", inst.name)
6354       names.append(inst.name)
6355
6356       tasklets.append(TLMigrateInstance(self, inst.name, cleanup=False,
6357                                         iallocator=self.op.iallocator,
6358                                         taget_node=None))
6359
6360       if inst.disk_template in constants.DTS_EXT_MIRROR:
6361         # We need to lock all nodes, as the iallocator will choose the
6362         # destination nodes afterwards
6363         self.lock_all_nodes = True
6364
6365     self.tasklets = tasklets
6366
6367     # Declare node locks
6368     if self.lock_all_nodes:
6369       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6370     else:
6371       self.needed_locks[locking.LEVEL_NODE] = [self.op.node_name]
6372       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6373
6374     # Declare instance locks
6375     self.needed_locks[locking.LEVEL_INSTANCE] = names
6376
6377   def DeclareLocks(self, level):
6378     if level == locking.LEVEL_NODE and not self.lock_all_nodes:
6379       self._LockInstancesNodes()
6380
6381   def BuildHooksEnv(self):
6382     """Build hooks env.
6383
6384     This runs on the master, the primary and all the secondaries.
6385
6386     """
6387     return {
6388       "NODE_NAME": self.op.node_name,
6389       }
6390
6391   def BuildHooksNodes(self):
6392     """Build hooks nodes.
6393
6394     """
6395     nl = [self.cfg.GetMasterNode()]
6396     return (nl, nl)
6397
6398
6399 class TLMigrateInstance(Tasklet):
6400   """Tasklet class for instance migration.
6401
6402   @type live: boolean
6403   @ivar live: whether the migration will be done live or non-live;
6404       this variable is initalized only after CheckPrereq has run
6405   @type cleanup: boolean
6406   @ivar cleanup: Wheater we cleanup from a failed migration
6407   @type iallocator: string
6408   @ivar iallocator: The iallocator used to determine target_node
6409   @type target_node: string
6410   @ivar target_node: If given, the target_node to reallocate the instance to
6411   @type failover: boolean
6412   @ivar failover: Whether operation results in failover or migration
6413   @type fallback: boolean
6414   @ivar fallback: Whether fallback to failover is allowed if migration not
6415                   possible
6416   @type ignore_consistency: boolean
6417   @ivar ignore_consistency: Wheter we should ignore consistency between source
6418                             and target node
6419   @type shutdown_timeout: int
6420   @ivar shutdown_timeout: In case of failover timeout of the shutdown
6421
6422   """
6423   def __init__(self, lu, instance_name, cleanup=False, iallocator=None,
6424                target_node=None, failover=False, fallback=False,
6425                ignore_consistency=False,
6426                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6427     """Initializes this class.
6428
6429     """
6430     Tasklet.__init__(self, lu)
6431
6432     # Parameters
6433     self.instance_name = instance_name
6434     self.cleanup = cleanup
6435     self.live = False # will be overridden later
6436     self.iallocator = iallocator
6437     self.target_node = target_node
6438     self.failover = failover
6439     self.fallback = fallback
6440     self.ignore_consistency = ignore_consistency
6441     self.shutdown_timeout = shutdown_timeout
6442
6443   def CheckPrereq(self):
6444     """Check prerequisites.
6445
6446     This checks that the instance is in the cluster.
6447
6448     """
6449     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6450     instance = self.cfg.GetInstanceInfo(instance_name)
6451     assert instance is not None
6452     self.instance = instance
6453
6454     if (not self.cleanup and not instance.admin_up and not self.failover and
6455         self.fallback):
6456       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6457                       " to failover")
6458       self.failover = True
6459
6460     if instance.disk_template not in constants.DTS_MIRRORED:
6461       if self.failover:
6462         text = "failovers"
6463       else:
6464         text = "migrations"
6465       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6466                                  " %s" % (instance.disk_template, text),
6467                                  errors.ECODE_STATE)
6468
6469     if instance.disk_template in constants.DTS_EXT_MIRROR:
6470       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6471
6472       if self.iallocator:
6473         self._RunAllocator()
6474
6475       # self.target_node is already populated, either directly or by the
6476       # iallocator run
6477       target_node = self.target_node
6478
6479       if len(self.lu.tasklets) == 1:
6480         # It is safe to release locks only when we're the only tasklet in the LU
6481         _ReleaseLocks(self, locking.LEVEL_NODE,
6482                       keep=[instance.primary_node, self.target_node])
6483
6484     else:
6485       secondary_nodes = instance.secondary_nodes
6486       if not secondary_nodes:
6487         raise errors.ConfigurationError("No secondary node but using"
6488                                         " %s disk template" %
6489                                         instance.disk_template)
6490       target_node = secondary_nodes[0]
6491       if self.iallocator or (self.target_node and
6492                              self.target_node != target_node):
6493         if self.failover:
6494           text = "failed over"
6495         else:
6496           text = "migrated"
6497         raise errors.OpPrereqError("Instances with disk template %s cannot"
6498                                    " be %s to arbitrary nodes"
6499                                    " (neither an iallocator nor a target"
6500                                    " node can be passed)" %
6501                                    (instance.disk_template, text),
6502                                    errors.ECODE_INVAL)
6503
6504     i_be = self.cfg.GetClusterInfo().FillBE(instance)
6505
6506     # check memory requirements on the secondary node
6507     if not self.failover or instance.admin_up:
6508       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
6509                            instance.name, i_be[constants.BE_MEMORY],
6510                            instance.hypervisor)
6511     else:
6512       self.lu.LogInfo("Not checking memory on the secondary node as"
6513                       " instance will not be started")
6514
6515     # check bridge existance
6516     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
6517
6518     if not self.cleanup:
6519       _CheckNodeNotDrained(self.lu, target_node)
6520       if not self.failover:
6521         result = self.rpc.call_instance_migratable(instance.primary_node,
6522                                                    instance)
6523         if result.fail_msg and self.fallback:
6524           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
6525                           " failover")
6526           self.failover = True
6527         else:
6528           result.Raise("Can't migrate, please use failover",
6529                        prereq=True, ecode=errors.ECODE_STATE)
6530
6531     assert not (self.failover and self.cleanup)
6532
6533   def _RunAllocator(self):
6534     """Run the allocator based on input opcode.
6535
6536     """
6537     ial = IAllocator(self.cfg, self.rpc,
6538                      mode=constants.IALLOCATOR_MODE_RELOC,
6539                      name=self.instance_name,
6540                      # TODO See why hail breaks with a single node below
6541                      relocate_from=[self.instance.primary_node,
6542                                     self.instance.primary_node],
6543                      )
6544
6545     ial.Run(self.iallocator)
6546
6547     if not ial.success:
6548       raise errors.OpPrereqError("Can't compute nodes using"
6549                                  " iallocator '%s': %s" %
6550                                  (self.iallocator, ial.info),
6551                                  errors.ECODE_NORES)
6552     if len(ial.result) != ial.required_nodes:
6553       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
6554                                  " of nodes (%s), required %s" %
6555                                  (self.iallocator, len(ial.result),
6556                                   ial.required_nodes), errors.ECODE_FAULT)
6557     self.target_node = ial.result[0]
6558     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
6559                  self.instance_name, self.iallocator,
6560                  utils.CommaJoin(ial.result))
6561
6562     if not self.failover:
6563       if self.lu.op.live is not None and self.lu.op.mode is not None:
6564         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
6565                                    " parameters are accepted",
6566                                    errors.ECODE_INVAL)
6567       if self.lu.op.live is not None:
6568         if self.lu.op.live:
6569           self.lu.op.mode = constants.HT_MIGRATION_LIVE
6570         else:
6571           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
6572         # reset the 'live' parameter to None so that repeated
6573         # invocations of CheckPrereq do not raise an exception
6574         self.lu.op.live = None
6575       elif self.lu.op.mode is None:
6576         # read the default value from the hypervisor
6577         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
6578                                                 skip_globals=False)
6579         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
6580
6581       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
6582     else:
6583       # Failover is never live
6584       self.live = False
6585
6586   def _WaitUntilSync(self):
6587     """Poll with custom rpc for disk sync.
6588
6589     This uses our own step-based rpc call.
6590
6591     """
6592     self.feedback_fn("* wait until resync is done")
6593     all_done = False
6594     while not all_done:
6595       all_done = True
6596       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
6597                                             self.nodes_ip,
6598                                             self.instance.disks)
6599       min_percent = 100
6600       for node, nres in result.items():
6601         nres.Raise("Cannot resync disks on node %s" % node)
6602         node_done, node_percent = nres.payload
6603         all_done = all_done and node_done
6604         if node_percent is not None:
6605           min_percent = min(min_percent, node_percent)
6606       if not all_done:
6607         if min_percent < 100:
6608           self.feedback_fn("   - progress: %.1f%%" % min_percent)
6609         time.sleep(2)
6610
6611   def _EnsureSecondary(self, node):
6612     """Demote a node to secondary.
6613
6614     """
6615     self.feedback_fn("* switching node %s to secondary mode" % node)
6616
6617     for dev in self.instance.disks:
6618       self.cfg.SetDiskID(dev, node)
6619
6620     result = self.rpc.call_blockdev_close(node, self.instance.name,
6621                                           self.instance.disks)
6622     result.Raise("Cannot change disk to secondary on node %s" % node)
6623
6624   def _GoStandalone(self):
6625     """Disconnect from the network.
6626
6627     """
6628     self.feedback_fn("* changing into standalone mode")
6629     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
6630                                                self.instance.disks)
6631     for node, nres in result.items():
6632       nres.Raise("Cannot disconnect disks node %s" % node)
6633
6634   def _GoReconnect(self, multimaster):
6635     """Reconnect to the network.
6636
6637     """
6638     if multimaster:
6639       msg = "dual-master"
6640     else:
6641       msg = "single-master"
6642     self.feedback_fn("* changing disks into %s mode" % msg)
6643     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
6644                                            self.instance.disks,
6645                                            self.instance.name, multimaster)
6646     for node, nres in result.items():
6647       nres.Raise("Cannot change disks config on node %s" % node)
6648
6649   def _ExecCleanup(self):
6650     """Try to cleanup after a failed migration.
6651
6652     The cleanup is done by:
6653       - check that the instance is running only on one node
6654         (and update the config if needed)
6655       - change disks on its secondary node to secondary
6656       - wait until disks are fully synchronized
6657       - disconnect from the network
6658       - change disks into single-master mode
6659       - wait again until disks are fully synchronized
6660
6661     """
6662     instance = self.instance
6663     target_node = self.target_node
6664     source_node = self.source_node
6665
6666     # check running on only one node
6667     self.feedback_fn("* checking where the instance actually runs"
6668                      " (if this hangs, the hypervisor might be in"
6669                      " a bad state)")
6670     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
6671     for node, result in ins_l.items():
6672       result.Raise("Can't contact node %s" % node)
6673
6674     runningon_source = instance.name in ins_l[source_node].payload
6675     runningon_target = instance.name in ins_l[target_node].payload
6676
6677     if runningon_source and runningon_target:
6678       raise errors.OpExecError("Instance seems to be running on two nodes,"
6679                                " or the hypervisor is confused; you will have"
6680                                " to ensure manually that it runs only on one"
6681                                " and restart this operation")
6682
6683     if not (runningon_source or runningon_target):
6684       raise errors.OpExecError("Instance does not seem to be running at all;"
6685                                " in this case it's safer to repair by"
6686                                " running 'gnt-instance stop' to ensure disk"
6687                                " shutdown, and then restarting it")
6688
6689     if runningon_target:
6690       # the migration has actually succeeded, we need to update the config
6691       self.feedback_fn("* instance running on secondary node (%s),"
6692                        " updating config" % target_node)
6693       instance.primary_node = target_node
6694       self.cfg.Update(instance, self.feedback_fn)
6695       demoted_node = source_node
6696     else:
6697       self.feedback_fn("* instance confirmed to be running on its"
6698                        " primary node (%s)" % source_node)
6699       demoted_node = target_node
6700
6701     if instance.disk_template in constants.DTS_INT_MIRROR:
6702       self._EnsureSecondary(demoted_node)
6703       try:
6704         self._WaitUntilSync()
6705       except errors.OpExecError:
6706         # we ignore here errors, since if the device is standalone, it
6707         # won't be able to sync
6708         pass
6709       self._GoStandalone()
6710       self._GoReconnect(False)
6711       self._WaitUntilSync()
6712
6713     self.feedback_fn("* done")
6714
6715   def _RevertDiskStatus(self):
6716     """Try to revert the disk status after a failed migration.
6717
6718     """
6719     target_node = self.target_node
6720     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
6721       return
6722
6723     try:
6724       self._EnsureSecondary(target_node)
6725       self._GoStandalone()
6726       self._GoReconnect(False)
6727       self._WaitUntilSync()
6728     except errors.OpExecError, err:
6729       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
6730                          " please try to recover the instance manually;"
6731                          " error '%s'" % str(err))
6732
6733   def _AbortMigration(self):
6734     """Call the hypervisor code to abort a started migration.
6735
6736     """
6737     instance = self.instance
6738     target_node = self.target_node
6739     migration_info = self.migration_info
6740
6741     abort_result = self.rpc.call_finalize_migration(target_node,
6742                                                     instance,
6743                                                     migration_info,
6744                                                     False)
6745     abort_msg = abort_result.fail_msg
6746     if abort_msg:
6747       logging.error("Aborting migration failed on target node %s: %s",
6748                     target_node, abort_msg)
6749       # Don't raise an exception here, as we stil have to try to revert the
6750       # disk status, even if this step failed.
6751
6752   def _ExecMigration(self):
6753     """Migrate an instance.
6754
6755     The migrate is done by:
6756       - change the disks into dual-master mode
6757       - wait until disks are fully synchronized again
6758       - migrate the instance
6759       - change disks on the new secondary node (the old primary) to secondary
6760       - wait until disks are fully synchronized
6761       - change disks into single-master mode
6762
6763     """
6764     instance = self.instance
6765     target_node = self.target_node
6766     source_node = self.source_node
6767
6768     self.feedback_fn("* checking disk consistency between source and target")
6769     for dev in instance.disks:
6770       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
6771         raise errors.OpExecError("Disk %s is degraded or not fully"
6772                                  " synchronized on target node,"
6773                                  " aborting migration" % dev.iv_name)
6774
6775     # First get the migration information from the remote node
6776     result = self.rpc.call_migration_info(source_node, instance)
6777     msg = result.fail_msg
6778     if msg:
6779       log_err = ("Failed fetching source migration information from %s: %s" %
6780                  (source_node, msg))
6781       logging.error(log_err)
6782       raise errors.OpExecError(log_err)
6783
6784     self.migration_info = migration_info = result.payload
6785
6786     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6787       # Then switch the disks to master/master mode
6788       self._EnsureSecondary(target_node)
6789       self._GoStandalone()
6790       self._GoReconnect(True)
6791       self._WaitUntilSync()
6792
6793     self.feedback_fn("* preparing %s to accept the instance" % target_node)
6794     result = self.rpc.call_accept_instance(target_node,
6795                                            instance,
6796                                            migration_info,
6797                                            self.nodes_ip[target_node])
6798
6799     msg = result.fail_msg
6800     if msg:
6801       logging.error("Instance pre-migration failed, trying to revert"
6802                     " disk status: %s", msg)
6803       self.feedback_fn("Pre-migration failed, aborting")
6804       self._AbortMigration()
6805       self._RevertDiskStatus()
6806       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
6807                                (instance.name, msg))
6808
6809     self.feedback_fn("* migrating instance to %s" % target_node)
6810     result = self.rpc.call_instance_migrate(source_node, instance,
6811                                             self.nodes_ip[target_node],
6812                                             self.live)
6813     msg = result.fail_msg
6814     if msg:
6815       logging.error("Instance migration failed, trying to revert"
6816                     " disk status: %s", msg)
6817       self.feedback_fn("Migration failed, aborting")
6818       self._AbortMigration()
6819       self._RevertDiskStatus()
6820       raise errors.OpExecError("Could not migrate instance %s: %s" %
6821                                (instance.name, msg))
6822
6823     instance.primary_node = target_node
6824     # distribute new instance config to the other nodes
6825     self.cfg.Update(instance, self.feedback_fn)
6826
6827     result = self.rpc.call_finalize_migration(target_node,
6828                                               instance,
6829                                               migration_info,
6830                                               True)
6831     msg = result.fail_msg
6832     if msg:
6833       logging.error("Instance migration succeeded, but finalization failed:"
6834                     " %s", msg)
6835       raise errors.OpExecError("Could not finalize instance migration: %s" %
6836                                msg)
6837
6838     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
6839       self._EnsureSecondary(source_node)
6840       self._WaitUntilSync()
6841       self._GoStandalone()
6842       self._GoReconnect(False)
6843       self._WaitUntilSync()
6844
6845     self.feedback_fn("* done")
6846
6847   def _ExecFailover(self):
6848     """Failover an instance.
6849
6850     The failover is done by shutting it down on its present node and
6851     starting it on the secondary.
6852
6853     """
6854     instance = self.instance
6855     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
6856
6857     source_node = instance.primary_node
6858     target_node = self.target_node
6859
6860     if instance.admin_up:
6861       self.feedback_fn("* checking disk consistency between source and target")
6862       for dev in instance.disks:
6863         # for drbd, these are drbd over lvm
6864         if not _CheckDiskConsistency(self, dev, target_node, False):
6865           if not self.ignore_consistency:
6866             raise errors.OpExecError("Disk %s is degraded on target node,"
6867                                      " aborting failover" % dev.iv_name)
6868     else:
6869       self.feedback_fn("* not checking disk consistency as instance is not"
6870                        " running")
6871
6872     self.feedback_fn("* shutting down instance on source node")
6873     logging.info("Shutting down instance %s on node %s",
6874                  instance.name, source_node)
6875
6876     result = self.rpc.call_instance_shutdown(source_node, instance,
6877                                              self.shutdown_timeout)
6878     msg = result.fail_msg
6879     if msg:
6880       if self.ignore_consistency or primary_node.offline:
6881         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
6882                            " proceeding anyway; please make sure node"
6883                            " %s is down; error details: %s",
6884                            instance.name, source_node, source_node, msg)
6885       else:
6886         raise errors.OpExecError("Could not shutdown instance %s on"
6887                                  " node %s: %s" %
6888                                  (instance.name, source_node, msg))
6889
6890     self.feedback_fn("* deactivating the instance's disks on source node")
6891     if not _ShutdownInstanceDisks(self, instance, ignore_primary=True):
6892       raise errors.OpExecError("Can't shut down the instance's disks.")
6893
6894     instance.primary_node = target_node
6895     # distribute new instance config to the other nodes
6896     self.cfg.Update(instance, self.feedback_fn)
6897
6898     # Only start the instance if it's marked as up
6899     if instance.admin_up:
6900       self.feedback_fn("* activating the instance's disks on target node")
6901       logging.info("Starting instance %s on node %s",
6902                    instance.name, target_node)
6903
6904       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6905                                            ignore_secondaries=True)
6906       if not disks_ok:
6907         _ShutdownInstanceDisks(self, instance)
6908         raise errors.OpExecError("Can't activate the instance's disks")
6909
6910       self.feedback_fn("* starting the instance on the target node")
6911       result = self.rpc.call_instance_start(target_node, instance, None, None)
6912       msg = result.fail_msg
6913       if msg:
6914         _ShutdownInstanceDisks(self, instance)
6915         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6916                                  (instance.name, target_node, msg))
6917
6918   def Exec(self, feedback_fn):
6919     """Perform the migration.
6920
6921     """
6922     self.feedback_fn = feedback_fn
6923     self.source_node = self.instance.primary_node
6924
6925     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
6926     if self.instance.disk_template in constants.DTS_INT_MIRROR:
6927       self.target_node = self.instance.secondary_nodes[0]
6928       # Otherwise self.target_node has been populated either
6929       # directly, or through an iallocator.
6930
6931     self.all_nodes = [self.source_node, self.target_node]
6932     self.nodes_ip = {
6933       self.source_node: self.cfg.GetNodeInfo(self.source_node).secondary_ip,
6934       self.target_node: self.cfg.GetNodeInfo(self.target_node).secondary_ip,
6935       }
6936
6937     if self.failover:
6938       feedback_fn("Failover instance %s" % self.instance.name)
6939       self._ExecFailover()
6940     else:
6941       feedback_fn("Migrating instance %s" % self.instance.name)
6942
6943       if self.cleanup:
6944         return self._ExecCleanup()
6945       else:
6946         return self._ExecMigration()
6947
6948
6949 def _CreateBlockDev(lu, node, instance, device, force_create,
6950                     info, force_open):
6951   """Create a tree of block devices on a given node.
6952
6953   If this device type has to be created on secondaries, create it and
6954   all its children.
6955
6956   If not, just recurse to children keeping the same 'force' value.
6957
6958   @param lu: the lu on whose behalf we execute
6959   @param node: the node on which to create the device
6960   @type instance: L{objects.Instance}
6961   @param instance: the instance which owns the device
6962   @type device: L{objects.Disk}
6963   @param device: the device to create
6964   @type force_create: boolean
6965   @param force_create: whether to force creation of this device; this
6966       will be change to True whenever we find a device which has
6967       CreateOnSecondary() attribute
6968   @param info: the extra 'metadata' we should attach to the device
6969       (this will be represented as a LVM tag)
6970   @type force_open: boolean
6971   @param force_open: this parameter will be passes to the
6972       L{backend.BlockdevCreate} function where it specifies
6973       whether we run on primary or not, and it affects both
6974       the child assembly and the device own Open() execution
6975
6976   """
6977   if device.CreateOnSecondary():
6978     force_create = True
6979
6980   if device.children:
6981     for child in device.children:
6982       _CreateBlockDev(lu, node, instance, child, force_create,
6983                       info, force_open)
6984
6985   if not force_create:
6986     return
6987
6988   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
6989
6990
6991 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
6992   """Create a single block device on a given node.
6993
6994   This will not recurse over children of the device, so they must be
6995   created in advance.
6996
6997   @param lu: the lu on whose behalf we execute
6998   @param node: the node on which to create the device
6999   @type instance: L{objects.Instance}
7000   @param instance: the instance which owns the device
7001   @type device: L{objects.Disk}
7002   @param device: the device to create
7003   @param info: the extra 'metadata' we should attach to the device
7004       (this will be represented as a LVM tag)
7005   @type force_open: boolean
7006   @param force_open: this parameter will be passes to the
7007       L{backend.BlockdevCreate} function where it specifies
7008       whether we run on primary or not, and it affects both
7009       the child assembly and the device own Open() execution
7010
7011   """
7012   lu.cfg.SetDiskID(device, node)
7013   result = lu.rpc.call_blockdev_create(node, device, device.size,
7014                                        instance.name, force_open, info)
7015   result.Raise("Can't create block device %s on"
7016                " node %s for instance %s" % (device, node, instance.name))
7017   if device.physical_id is None:
7018     device.physical_id = result.payload
7019
7020
7021 def _GenerateUniqueNames(lu, exts):
7022   """Generate a suitable LV name.
7023
7024   This will generate a logical volume name for the given instance.
7025
7026   """
7027   results = []
7028   for val in exts:
7029     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7030     results.append("%s%s" % (new_id, val))
7031   return results
7032
7033
7034 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7035                          iv_name, p_minor, s_minor):
7036   """Generate a drbd8 device complete with its children.
7037
7038   """
7039   assert len(vgnames) == len(names) == 2
7040   port = lu.cfg.AllocatePort()
7041   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7042   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7043                           logical_id=(vgnames[0], names[0]))
7044   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7045                           logical_id=(vgnames[1], names[1]))
7046   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7047                           logical_id=(primary, secondary, port,
7048                                       p_minor, s_minor,
7049                                       shared_secret),
7050                           children=[dev_data, dev_meta],
7051                           iv_name=iv_name)
7052   return drbd_dev
7053
7054
7055 def _GenerateDiskTemplate(lu, template_name,
7056                           instance_name, primary_node,
7057                           secondary_nodes, disk_info,
7058                           file_storage_dir, file_driver,
7059                           base_index, feedback_fn):
7060   """Generate the entire disk layout for a given template type.
7061
7062   """
7063   #TODO: compute space requirements
7064
7065   vgname = lu.cfg.GetVGName()
7066   disk_count = len(disk_info)
7067   disks = []
7068   if template_name == constants.DT_DISKLESS:
7069     pass
7070   elif template_name == constants.DT_PLAIN:
7071     if len(secondary_nodes) != 0:
7072       raise errors.ProgrammerError("Wrong template configuration")
7073
7074     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7075                                       for i in range(disk_count)])
7076     for idx, disk in enumerate(disk_info):
7077       disk_index = idx + base_index
7078       vg = disk.get(constants.IDISK_VG, vgname)
7079       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7080       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7081                               size=disk[constants.IDISK_SIZE],
7082                               logical_id=(vg, names[idx]),
7083                               iv_name="disk/%d" % disk_index,
7084                               mode=disk[constants.IDISK_MODE])
7085       disks.append(disk_dev)
7086   elif template_name == constants.DT_DRBD8:
7087     if len(secondary_nodes) != 1:
7088       raise errors.ProgrammerError("Wrong template configuration")
7089     remote_node = secondary_nodes[0]
7090     minors = lu.cfg.AllocateDRBDMinor(
7091       [primary_node, remote_node] * len(disk_info), instance_name)
7092
7093     names = []
7094     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7095                                                for i in range(disk_count)]):
7096       names.append(lv_prefix + "_data")
7097       names.append(lv_prefix + "_meta")
7098     for idx, disk in enumerate(disk_info):
7099       disk_index = idx + base_index
7100       data_vg = disk.get(constants.IDISK_VG, vgname)
7101       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7102       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7103                                       disk[constants.IDISK_SIZE],
7104                                       [data_vg, meta_vg],
7105                                       names[idx * 2:idx * 2 + 2],
7106                                       "disk/%d" % disk_index,
7107                                       minors[idx * 2], minors[idx * 2 + 1])
7108       disk_dev.mode = disk[constants.IDISK_MODE]
7109       disks.append(disk_dev)
7110   elif template_name == constants.DT_FILE:
7111     if len(secondary_nodes) != 0:
7112       raise errors.ProgrammerError("Wrong template configuration")
7113
7114     opcodes.RequireFileStorage()
7115
7116     for idx, disk in enumerate(disk_info):
7117       disk_index = idx + base_index
7118       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7119                               size=disk[constants.IDISK_SIZE],
7120                               iv_name="disk/%d" % disk_index,
7121                               logical_id=(file_driver,
7122                                           "%s/disk%d" % (file_storage_dir,
7123                                                          disk_index)),
7124                               mode=disk[constants.IDISK_MODE])
7125       disks.append(disk_dev)
7126   elif template_name == constants.DT_SHARED_FILE:
7127     if len(secondary_nodes) != 0:
7128       raise errors.ProgrammerError("Wrong template configuration")
7129
7130     opcodes.RequireSharedFileStorage()
7131
7132     for idx, disk in enumerate(disk_info):
7133       disk_index = idx + base_index
7134       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7135                               size=disk[constants.IDISK_SIZE],
7136                               iv_name="disk/%d" % disk_index,
7137                               logical_id=(file_driver,
7138                                           "%s/disk%d" % (file_storage_dir,
7139                                                          disk_index)),
7140                               mode=disk[constants.IDISK_MODE])
7141       disks.append(disk_dev)
7142   elif template_name == constants.DT_BLOCK:
7143     if len(secondary_nodes) != 0:
7144       raise errors.ProgrammerError("Wrong template configuration")
7145
7146     for idx, disk in enumerate(disk_info):
7147       disk_index = idx + base_index
7148       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7149                               size=disk[constants.IDISK_SIZE],
7150                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7151                                           disk[constants.IDISK_ADOPT]),
7152                               iv_name="disk/%d" % disk_index,
7153                               mode=disk[constants.IDISK_MODE])
7154       disks.append(disk_dev)
7155
7156   else:
7157     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7158   return disks
7159
7160
7161 def _GetInstanceInfoText(instance):
7162   """Compute that text that should be added to the disk's metadata.
7163
7164   """
7165   return "originstname+%s" % instance.name
7166
7167
7168 def _CalcEta(time_taken, written, total_size):
7169   """Calculates the ETA based on size written and total size.
7170
7171   @param time_taken: The time taken so far
7172   @param written: amount written so far
7173   @param total_size: The total size of data to be written
7174   @return: The remaining time in seconds
7175
7176   """
7177   avg_time = time_taken / float(written)
7178   return (total_size - written) * avg_time
7179
7180
7181 def _WipeDisks(lu, instance):
7182   """Wipes instance disks.
7183
7184   @type lu: L{LogicalUnit}
7185   @param lu: the logical unit on whose behalf we execute
7186   @type instance: L{objects.Instance}
7187   @param instance: the instance whose disks we should create
7188   @return: the success of the wipe
7189
7190   """
7191   node = instance.primary_node
7192
7193   for device in instance.disks:
7194     lu.cfg.SetDiskID(device, node)
7195
7196   logging.info("Pause sync of instance %s disks", instance.name)
7197   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7198
7199   for idx, success in enumerate(result.payload):
7200     if not success:
7201       logging.warn("pause-sync of instance %s for disks %d failed",
7202                    instance.name, idx)
7203
7204   try:
7205     for idx, device in enumerate(instance.disks):
7206       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7207       # MAX_WIPE_CHUNK at max
7208       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7209                             constants.MIN_WIPE_CHUNK_PERCENT)
7210       # we _must_ make this an int, otherwise rounding errors will
7211       # occur
7212       wipe_chunk_size = int(wipe_chunk_size)
7213
7214       lu.LogInfo("* Wiping disk %d", idx)
7215       logging.info("Wiping disk %d for instance %s, node %s using"
7216                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7217
7218       offset = 0
7219       size = device.size
7220       last_output = 0
7221       start_time = time.time()
7222
7223       while offset < size:
7224         wipe_size = min(wipe_chunk_size, size - offset)
7225         logging.debug("Wiping disk %d, offset %s, chunk %s",
7226                       idx, offset, wipe_size)
7227         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7228         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7229                      (idx, offset, wipe_size))
7230         now = time.time()
7231         offset += wipe_size
7232         if now - last_output >= 60:
7233           eta = _CalcEta(now - start_time, offset, size)
7234           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7235                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7236           last_output = now
7237   finally:
7238     logging.info("Resume sync of instance %s disks", instance.name)
7239
7240     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7241
7242     for idx, success in enumerate(result.payload):
7243       if not success:
7244         lu.LogWarning("Resume sync of disk %d failed, please have a"
7245                       " look at the status and troubleshoot the issue", idx)
7246         logging.warn("resume-sync of instance %s for disks %d failed",
7247                      instance.name, idx)
7248
7249
7250 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7251   """Create all disks for an instance.
7252
7253   This abstracts away some work from AddInstance.
7254
7255   @type lu: L{LogicalUnit}
7256   @param lu: the logical unit on whose behalf we execute
7257   @type instance: L{objects.Instance}
7258   @param instance: the instance whose disks we should create
7259   @type to_skip: list
7260   @param to_skip: list of indices to skip
7261   @type target_node: string
7262   @param target_node: if passed, overrides the target node for creation
7263   @rtype: boolean
7264   @return: the success of the creation
7265
7266   """
7267   info = _GetInstanceInfoText(instance)
7268   if target_node is None:
7269     pnode = instance.primary_node
7270     all_nodes = instance.all_nodes
7271   else:
7272     pnode = target_node
7273     all_nodes = [pnode]
7274
7275   if instance.disk_template in (constants.DT_FILE, constants.DT_SHARED_FILE):
7276     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7277     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7278
7279     result.Raise("Failed to create directory '%s' on"
7280                  " node %s" % (file_storage_dir, pnode))
7281
7282   # Note: this needs to be kept in sync with adding of disks in
7283   # LUInstanceSetParams
7284   for idx, device in enumerate(instance.disks):
7285     if to_skip and idx in to_skip:
7286       continue
7287     logging.info("Creating volume %s for instance %s",
7288                  device.iv_name, instance.name)
7289     #HARDCODE
7290     for node in all_nodes:
7291       f_create = node == pnode
7292       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7293
7294
7295 def _RemoveDisks(lu, instance, target_node=None):
7296   """Remove all disks for an instance.
7297
7298   This abstracts away some work from `AddInstance()` and
7299   `RemoveInstance()`. Note that in case some of the devices couldn't
7300   be removed, the removal will continue with the other ones (compare
7301   with `_CreateDisks()`).
7302
7303   @type lu: L{LogicalUnit}
7304   @param lu: the logical unit on whose behalf we execute
7305   @type instance: L{objects.Instance}
7306   @param instance: the instance whose disks we should remove
7307   @type target_node: string
7308   @param target_node: used to override the node on which to remove the disks
7309   @rtype: boolean
7310   @return: the success of the removal
7311
7312   """
7313   logging.info("Removing block devices for instance %s", instance.name)
7314
7315   all_result = True
7316   for device in instance.disks:
7317     if target_node:
7318       edata = [(target_node, device)]
7319     else:
7320       edata = device.ComputeNodeTree(instance.primary_node)
7321     for node, disk in edata:
7322       lu.cfg.SetDiskID(disk, node)
7323       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7324       if msg:
7325         lu.LogWarning("Could not remove block device %s on node %s,"
7326                       " continuing anyway: %s", device.iv_name, node, msg)
7327         all_result = False
7328
7329   if instance.disk_template == constants.DT_FILE:
7330     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7331     if target_node:
7332       tgt = target_node
7333     else:
7334       tgt = instance.primary_node
7335     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7336     if result.fail_msg:
7337       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7338                     file_storage_dir, instance.primary_node, result.fail_msg)
7339       all_result = False
7340
7341   return all_result
7342
7343
7344 def _ComputeDiskSizePerVG(disk_template, disks):
7345   """Compute disk size requirements in the volume group
7346
7347   """
7348   def _compute(disks, payload):
7349     """Universal algorithm.
7350
7351     """
7352     vgs = {}
7353     for disk in disks:
7354       vgs[disk[constants.IDISK_VG]] = \
7355         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7356
7357     return vgs
7358
7359   # Required free disk space as a function of disk and swap space
7360   req_size_dict = {
7361     constants.DT_DISKLESS: {},
7362     constants.DT_PLAIN: _compute(disks, 0),
7363     # 128 MB are added for drbd metadata for each disk
7364     constants.DT_DRBD8: _compute(disks, 128),
7365     constants.DT_FILE: {},
7366     constants.DT_SHARED_FILE: {},
7367   }
7368
7369   if disk_template not in req_size_dict:
7370     raise errors.ProgrammerError("Disk template '%s' size requirement"
7371                                  " is unknown" %  disk_template)
7372
7373   return req_size_dict[disk_template]
7374
7375
7376 def _ComputeDiskSize(disk_template, disks):
7377   """Compute disk size requirements in the volume group
7378
7379   """
7380   # Required free disk space as a function of disk and swap space
7381   req_size_dict = {
7382     constants.DT_DISKLESS: None,
7383     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7384     # 128 MB are added for drbd metadata for each disk
7385     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7386     constants.DT_FILE: None,
7387     constants.DT_SHARED_FILE: 0,
7388     constants.DT_BLOCK: 0,
7389   }
7390
7391   if disk_template not in req_size_dict:
7392     raise errors.ProgrammerError("Disk template '%s' size requirement"
7393                                  " is unknown" %  disk_template)
7394
7395   return req_size_dict[disk_template]
7396
7397
7398 def _FilterVmNodes(lu, nodenames):
7399   """Filters out non-vm_capable nodes from a list.
7400
7401   @type lu: L{LogicalUnit}
7402   @param lu: the logical unit for which we check
7403   @type nodenames: list
7404   @param nodenames: the list of nodes on which we should check
7405   @rtype: list
7406   @return: the list of vm-capable nodes
7407
7408   """
7409   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7410   return [name for name in nodenames if name not in vm_nodes]
7411
7412
7413 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7414   """Hypervisor parameter validation.
7415
7416   This function abstract the hypervisor parameter validation to be
7417   used in both instance create and instance modify.
7418
7419   @type lu: L{LogicalUnit}
7420   @param lu: the logical unit for which we check
7421   @type nodenames: list
7422   @param nodenames: the list of nodes on which we should check
7423   @type hvname: string
7424   @param hvname: the name of the hypervisor we should use
7425   @type hvparams: dict
7426   @param hvparams: the parameters which we need to check
7427   @raise errors.OpPrereqError: if the parameters are not valid
7428
7429   """
7430   nodenames = _FilterVmNodes(lu, nodenames)
7431   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7432                                                   hvname,
7433                                                   hvparams)
7434   for node in nodenames:
7435     info = hvinfo[node]
7436     if info.offline:
7437       continue
7438     info.Raise("Hypervisor parameter validation failed on node %s" % node)
7439
7440
7441 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7442   """OS parameters validation.
7443
7444   @type lu: L{LogicalUnit}
7445   @param lu: the logical unit for which we check
7446   @type required: boolean
7447   @param required: whether the validation should fail if the OS is not
7448       found
7449   @type nodenames: list
7450   @param nodenames: the list of nodes on which we should check
7451   @type osname: string
7452   @param osname: the name of the hypervisor we should use
7453   @type osparams: dict
7454   @param osparams: the parameters which we need to check
7455   @raise errors.OpPrereqError: if the parameters are not valid
7456
7457   """
7458   nodenames = _FilterVmNodes(lu, nodenames)
7459   result = lu.rpc.call_os_validate(required, nodenames, osname,
7460                                    [constants.OS_VALIDATE_PARAMETERS],
7461                                    osparams)
7462   for node, nres in result.items():
7463     # we don't check for offline cases since this should be run only
7464     # against the master node and/or an instance's nodes
7465     nres.Raise("OS Parameters validation failed on node %s" % node)
7466     if not nres.payload:
7467       lu.LogInfo("OS %s not found on node %s, validation skipped",
7468                  osname, node)
7469
7470
7471 class LUInstanceCreate(LogicalUnit):
7472   """Create an instance.
7473
7474   """
7475   HPATH = "instance-add"
7476   HTYPE = constants.HTYPE_INSTANCE
7477   REQ_BGL = False
7478
7479   def CheckArguments(self):
7480     """Check arguments.
7481
7482     """
7483     # do not require name_check to ease forward/backward compatibility
7484     # for tools
7485     if self.op.no_install and self.op.start:
7486       self.LogInfo("No-installation mode selected, disabling startup")
7487       self.op.start = False
7488     # validate/normalize the instance name
7489     self.op.instance_name = \
7490       netutils.Hostname.GetNormalizedName(self.op.instance_name)
7491
7492     if self.op.ip_check and not self.op.name_check:
7493       # TODO: make the ip check more flexible and not depend on the name check
7494       raise errors.OpPrereqError("Cannot do IP address check without a name"
7495                                  " check", errors.ECODE_INVAL)
7496
7497     # check nics' parameter names
7498     for nic in self.op.nics:
7499       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
7500
7501     # check disks. parameter names and consistent adopt/no-adopt strategy
7502     has_adopt = has_no_adopt = False
7503     for disk in self.op.disks:
7504       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
7505       if constants.IDISK_ADOPT in disk:
7506         has_adopt = True
7507       else:
7508         has_no_adopt = True
7509     if has_adopt and has_no_adopt:
7510       raise errors.OpPrereqError("Either all disks are adopted or none is",
7511                                  errors.ECODE_INVAL)
7512     if has_adopt:
7513       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
7514         raise errors.OpPrereqError("Disk adoption is not supported for the"
7515                                    " '%s' disk template" %
7516                                    self.op.disk_template,
7517                                    errors.ECODE_INVAL)
7518       if self.op.iallocator is not None:
7519         raise errors.OpPrereqError("Disk adoption not allowed with an"
7520                                    " iallocator script", errors.ECODE_INVAL)
7521       if self.op.mode == constants.INSTANCE_IMPORT:
7522         raise errors.OpPrereqError("Disk adoption not allowed for"
7523                                    " instance import", errors.ECODE_INVAL)
7524     else:
7525       if self.op.disk_template in constants.DTS_MUST_ADOPT:
7526         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
7527                                    " but no 'adopt' parameter given" %
7528                                    self.op.disk_template,
7529                                    errors.ECODE_INVAL)
7530
7531     self.adopt_disks = has_adopt
7532
7533     # instance name verification
7534     if self.op.name_check:
7535       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
7536       self.op.instance_name = self.hostname1.name
7537       # used in CheckPrereq for ip ping check
7538       self.check_ip = self.hostname1.ip
7539     else:
7540       self.check_ip = None
7541
7542     # file storage checks
7543     if (self.op.file_driver and
7544         not self.op.file_driver in constants.FILE_DRIVER):
7545       raise errors.OpPrereqError("Invalid file driver name '%s'" %
7546                                  self.op.file_driver, errors.ECODE_INVAL)
7547
7548     if self.op.file_storage_dir and os.path.isabs(self.op.file_storage_dir):
7549       raise errors.OpPrereqError("File storage directory path not absolute",
7550                                  errors.ECODE_INVAL)
7551
7552     ### Node/iallocator related checks
7553     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
7554
7555     if self.op.pnode is not None:
7556       if self.op.disk_template in constants.DTS_INT_MIRROR:
7557         if self.op.snode is None:
7558           raise errors.OpPrereqError("The networked disk templates need"
7559                                      " a mirror node", errors.ECODE_INVAL)
7560       elif self.op.snode:
7561         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
7562                         " template")
7563         self.op.snode = None
7564
7565     self._cds = _GetClusterDomainSecret()
7566
7567     if self.op.mode == constants.INSTANCE_IMPORT:
7568       # On import force_variant must be True, because if we forced it at
7569       # initial install, our only chance when importing it back is that it
7570       # works again!
7571       self.op.force_variant = True
7572
7573       if self.op.no_install:
7574         self.LogInfo("No-installation mode has no effect during import")
7575
7576     elif self.op.mode == constants.INSTANCE_CREATE:
7577       if self.op.os_type is None:
7578         raise errors.OpPrereqError("No guest OS specified",
7579                                    errors.ECODE_INVAL)
7580       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
7581         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
7582                                    " installation" % self.op.os_type,
7583                                    errors.ECODE_STATE)
7584       if self.op.disk_template is None:
7585         raise errors.OpPrereqError("No disk template specified",
7586                                    errors.ECODE_INVAL)
7587
7588     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
7589       # Check handshake to ensure both clusters have the same domain secret
7590       src_handshake = self.op.source_handshake
7591       if not src_handshake:
7592         raise errors.OpPrereqError("Missing source handshake",
7593                                    errors.ECODE_INVAL)
7594
7595       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
7596                                                            src_handshake)
7597       if errmsg:
7598         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
7599                                    errors.ECODE_INVAL)
7600
7601       # Load and check source CA
7602       self.source_x509_ca_pem = self.op.source_x509_ca
7603       if not self.source_x509_ca_pem:
7604         raise errors.OpPrereqError("Missing source X509 CA",
7605                                    errors.ECODE_INVAL)
7606
7607       try:
7608         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
7609                                                     self._cds)
7610       except OpenSSL.crypto.Error, err:
7611         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
7612                                    (err, ), errors.ECODE_INVAL)
7613
7614       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
7615       if errcode is not None:
7616         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
7617                                    errors.ECODE_INVAL)
7618
7619       self.source_x509_ca = cert
7620
7621       src_instance_name = self.op.source_instance_name
7622       if not src_instance_name:
7623         raise errors.OpPrereqError("Missing source instance name",
7624                                    errors.ECODE_INVAL)
7625
7626       self.source_instance_name = \
7627           netutils.GetHostname(name=src_instance_name).name
7628
7629     else:
7630       raise errors.OpPrereqError("Invalid instance creation mode %r" %
7631                                  self.op.mode, errors.ECODE_INVAL)
7632
7633   def ExpandNames(self):
7634     """ExpandNames for CreateInstance.
7635
7636     Figure out the right locks for instance creation.
7637
7638     """
7639     self.needed_locks = {}
7640
7641     instance_name = self.op.instance_name
7642     # this is just a preventive check, but someone might still add this
7643     # instance in the meantime, and creation will fail at lock-add time
7644     if instance_name in self.cfg.GetInstanceList():
7645       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
7646                                  instance_name, errors.ECODE_EXISTS)
7647
7648     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
7649
7650     if self.op.iallocator:
7651       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7652     else:
7653       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
7654       nodelist = [self.op.pnode]
7655       if self.op.snode is not None:
7656         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
7657         nodelist.append(self.op.snode)
7658       self.needed_locks[locking.LEVEL_NODE] = nodelist
7659
7660     # in case of import lock the source node too
7661     if self.op.mode == constants.INSTANCE_IMPORT:
7662       src_node = self.op.src_node
7663       src_path = self.op.src_path
7664
7665       if src_path is None:
7666         self.op.src_path = src_path = self.op.instance_name
7667
7668       if src_node is None:
7669         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
7670         self.op.src_node = None
7671         if os.path.isabs(src_path):
7672           raise errors.OpPrereqError("Importing an instance from an absolute"
7673                                      " path requires a source node option",
7674                                      errors.ECODE_INVAL)
7675       else:
7676         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
7677         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
7678           self.needed_locks[locking.LEVEL_NODE].append(src_node)
7679         if not os.path.isabs(src_path):
7680           self.op.src_path = src_path = \
7681             utils.PathJoin(constants.EXPORT_DIR, src_path)
7682
7683   def _RunAllocator(self):
7684     """Run the allocator based on input opcode.
7685
7686     """
7687     nics = [n.ToDict() for n in self.nics]
7688     ial = IAllocator(self.cfg, self.rpc,
7689                      mode=constants.IALLOCATOR_MODE_ALLOC,
7690                      name=self.op.instance_name,
7691                      disk_template=self.op.disk_template,
7692                      tags=[],
7693                      os=self.op.os_type,
7694                      vcpus=self.be_full[constants.BE_VCPUS],
7695                      mem_size=self.be_full[constants.BE_MEMORY],
7696                      disks=self.disks,
7697                      nics=nics,
7698                      hypervisor=self.op.hypervisor,
7699                      )
7700
7701     ial.Run(self.op.iallocator)
7702
7703     if not ial.success:
7704       raise errors.OpPrereqError("Can't compute nodes using"
7705                                  " iallocator '%s': %s" %
7706                                  (self.op.iallocator, ial.info),
7707                                  errors.ECODE_NORES)
7708     if len(ial.result) != ial.required_nodes:
7709       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7710                                  " of nodes (%s), required %s" %
7711                                  (self.op.iallocator, len(ial.result),
7712                                   ial.required_nodes), errors.ECODE_FAULT)
7713     self.op.pnode = ial.result[0]
7714     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7715                  self.op.instance_name, self.op.iallocator,
7716                  utils.CommaJoin(ial.result))
7717     if ial.required_nodes == 2:
7718       self.op.snode = ial.result[1]
7719
7720   def BuildHooksEnv(self):
7721     """Build hooks env.
7722
7723     This runs on master, primary and secondary nodes of the instance.
7724
7725     """
7726     env = {
7727       "ADD_MODE": self.op.mode,
7728       }
7729     if self.op.mode == constants.INSTANCE_IMPORT:
7730       env["SRC_NODE"] = self.op.src_node
7731       env["SRC_PATH"] = self.op.src_path
7732       env["SRC_IMAGES"] = self.src_images
7733
7734     env.update(_BuildInstanceHookEnv(
7735       name=self.op.instance_name,
7736       primary_node=self.op.pnode,
7737       secondary_nodes=self.secondaries,
7738       status=self.op.start,
7739       os_type=self.op.os_type,
7740       memory=self.be_full[constants.BE_MEMORY],
7741       vcpus=self.be_full[constants.BE_VCPUS],
7742       nics=_NICListToTuple(self, self.nics),
7743       disk_template=self.op.disk_template,
7744       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
7745              for d in self.disks],
7746       bep=self.be_full,
7747       hvp=self.hv_full,
7748       hypervisor_name=self.op.hypervisor,
7749     ))
7750
7751     return env
7752
7753   def BuildHooksNodes(self):
7754     """Build hooks nodes.
7755
7756     """
7757     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
7758     return nl, nl
7759
7760   def _ReadExportInfo(self):
7761     """Reads the export information from disk.
7762
7763     It will override the opcode source node and path with the actual
7764     information, if these two were not specified before.
7765
7766     @return: the export information
7767
7768     """
7769     assert self.op.mode == constants.INSTANCE_IMPORT
7770
7771     src_node = self.op.src_node
7772     src_path = self.op.src_path
7773
7774     if src_node is None:
7775       locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
7776       exp_list = self.rpc.call_export_list(locked_nodes)
7777       found = False
7778       for node in exp_list:
7779         if exp_list[node].fail_msg:
7780           continue
7781         if src_path in exp_list[node].payload:
7782           found = True
7783           self.op.src_node = src_node = node
7784           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
7785                                                        src_path)
7786           break
7787       if not found:
7788         raise errors.OpPrereqError("No export found for relative path %s" %
7789                                     src_path, errors.ECODE_INVAL)
7790
7791     _CheckNodeOnline(self, src_node)
7792     result = self.rpc.call_export_info(src_node, src_path)
7793     result.Raise("No export or invalid export found in dir %s" % src_path)
7794
7795     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
7796     if not export_info.has_section(constants.INISECT_EXP):
7797       raise errors.ProgrammerError("Corrupted export config",
7798                                    errors.ECODE_ENVIRON)
7799
7800     ei_version = export_info.get(constants.INISECT_EXP, "version")
7801     if (int(ei_version) != constants.EXPORT_VERSION):
7802       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
7803                                  (ei_version, constants.EXPORT_VERSION),
7804                                  errors.ECODE_ENVIRON)
7805     return export_info
7806
7807   def _ReadExportParams(self, einfo):
7808     """Use export parameters as defaults.
7809
7810     In case the opcode doesn't specify (as in override) some instance
7811     parameters, then try to use them from the export information, if
7812     that declares them.
7813
7814     """
7815     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
7816
7817     if self.op.disk_template is None:
7818       if einfo.has_option(constants.INISECT_INS, "disk_template"):
7819         self.op.disk_template = einfo.get(constants.INISECT_INS,
7820                                           "disk_template")
7821       else:
7822         raise errors.OpPrereqError("No disk template specified and the export"
7823                                    " is missing the disk_template information",
7824                                    errors.ECODE_INVAL)
7825
7826     if not self.op.disks:
7827       if einfo.has_option(constants.INISECT_INS, "disk_count"):
7828         disks = []
7829         # TODO: import the disk iv_name too
7830         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
7831           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
7832           disks.append({constants.IDISK_SIZE: disk_sz})
7833         self.op.disks = disks
7834       else:
7835         raise errors.OpPrereqError("No disk info specified and the export"
7836                                    " is missing the disk information",
7837                                    errors.ECODE_INVAL)
7838
7839     if (not self.op.nics and
7840         einfo.has_option(constants.INISECT_INS, "nic_count")):
7841       nics = []
7842       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
7843         ndict = {}
7844         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
7845           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
7846           ndict[name] = v
7847         nics.append(ndict)
7848       self.op.nics = nics
7849
7850     if (self.op.hypervisor is None and
7851         einfo.has_option(constants.INISECT_INS, "hypervisor")):
7852       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
7853     if einfo.has_section(constants.INISECT_HYP):
7854       # use the export parameters but do not override the ones
7855       # specified by the user
7856       for name, value in einfo.items(constants.INISECT_HYP):
7857         if name not in self.op.hvparams:
7858           self.op.hvparams[name] = value
7859
7860     if einfo.has_section(constants.INISECT_BEP):
7861       # use the parameters, without overriding
7862       for name, value in einfo.items(constants.INISECT_BEP):
7863         if name not in self.op.beparams:
7864           self.op.beparams[name] = value
7865     else:
7866       # try to read the parameters old style, from the main section
7867       for name in constants.BES_PARAMETERS:
7868         if (name not in self.op.beparams and
7869             einfo.has_option(constants.INISECT_INS, name)):
7870           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
7871
7872     if einfo.has_section(constants.INISECT_OSP):
7873       # use the parameters, without overriding
7874       for name, value in einfo.items(constants.INISECT_OSP):
7875         if name not in self.op.osparams:
7876           self.op.osparams[name] = value
7877
7878   def _RevertToDefaults(self, cluster):
7879     """Revert the instance parameters to the default values.
7880
7881     """
7882     # hvparams
7883     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
7884     for name in self.op.hvparams.keys():
7885       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
7886         del self.op.hvparams[name]
7887     # beparams
7888     be_defs = cluster.SimpleFillBE({})
7889     for name in self.op.beparams.keys():
7890       if name in be_defs and be_defs[name] == self.op.beparams[name]:
7891         del self.op.beparams[name]
7892     # nic params
7893     nic_defs = cluster.SimpleFillNIC({})
7894     for nic in self.op.nics:
7895       for name in constants.NICS_PARAMETERS:
7896         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
7897           del nic[name]
7898     # osparams
7899     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
7900     for name in self.op.osparams.keys():
7901       if name in os_defs and os_defs[name] == self.op.osparams[name]:
7902         del self.op.osparams[name]
7903
7904   def CheckPrereq(self):
7905     """Check prerequisites.
7906
7907     """
7908     if self.op.mode == constants.INSTANCE_IMPORT:
7909       export_info = self._ReadExportInfo()
7910       self._ReadExportParams(export_info)
7911
7912     if (not self.cfg.GetVGName() and
7913         self.op.disk_template not in constants.DTS_NOT_LVM):
7914       raise errors.OpPrereqError("Cluster does not support lvm-based"
7915                                  " instances", errors.ECODE_STATE)
7916
7917     if self.op.hypervisor is None:
7918       self.op.hypervisor = self.cfg.GetHypervisorType()
7919
7920     cluster = self.cfg.GetClusterInfo()
7921     enabled_hvs = cluster.enabled_hypervisors
7922     if self.op.hypervisor not in enabled_hvs:
7923       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
7924                                  " cluster (%s)" % (self.op.hypervisor,
7925                                   ",".join(enabled_hvs)),
7926                                  errors.ECODE_STATE)
7927
7928     # check hypervisor parameter syntax (locally)
7929     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
7930     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
7931                                       self.op.hvparams)
7932     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
7933     hv_type.CheckParameterSyntax(filled_hvp)
7934     self.hv_full = filled_hvp
7935     # check that we don't specify global parameters on an instance
7936     _CheckGlobalHvParams(self.op.hvparams)
7937
7938     # fill and remember the beparams dict
7939     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
7940     self.be_full = cluster.SimpleFillBE(self.op.beparams)
7941
7942     # build os parameters
7943     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
7944
7945     # now that hvp/bep are in final format, let's reset to defaults,
7946     # if told to do so
7947     if self.op.identify_defaults:
7948       self._RevertToDefaults(cluster)
7949
7950     # NIC buildup
7951     self.nics = []
7952     for idx, nic in enumerate(self.op.nics):
7953       nic_mode_req = nic.get(constants.INIC_MODE, None)
7954       nic_mode = nic_mode_req
7955       if nic_mode is None:
7956         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
7957
7958       # in routed mode, for the first nic, the default ip is 'auto'
7959       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
7960         default_ip_mode = constants.VALUE_AUTO
7961       else:
7962         default_ip_mode = constants.VALUE_NONE
7963
7964       # ip validity checks
7965       ip = nic.get(constants.INIC_IP, default_ip_mode)
7966       if ip is None or ip.lower() == constants.VALUE_NONE:
7967         nic_ip = None
7968       elif ip.lower() == constants.VALUE_AUTO:
7969         if not self.op.name_check:
7970           raise errors.OpPrereqError("IP address set to auto but name checks"
7971                                      " have been skipped",
7972                                      errors.ECODE_INVAL)
7973         nic_ip = self.hostname1.ip
7974       else:
7975         if not netutils.IPAddress.IsValid(ip):
7976           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
7977                                      errors.ECODE_INVAL)
7978         nic_ip = ip
7979
7980       # TODO: check the ip address for uniqueness
7981       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
7982         raise errors.OpPrereqError("Routed nic mode requires an ip address",
7983                                    errors.ECODE_INVAL)
7984
7985       # MAC address verification
7986       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
7987       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
7988         mac = utils.NormalizeAndValidateMac(mac)
7989
7990         try:
7991           self.cfg.ReserveMAC(mac, self.proc.GetECId())
7992         except errors.ReservationError:
7993           raise errors.OpPrereqError("MAC address %s already in use"
7994                                      " in cluster" % mac,
7995                                      errors.ECODE_NOTUNIQUE)
7996
7997       #  Build nic parameters
7998       link = nic.get(constants.INIC_LINK, None)
7999       nicparams = {}
8000       if nic_mode_req:
8001         nicparams[constants.NIC_MODE] = nic_mode_req
8002       if link:
8003         nicparams[constants.NIC_LINK] = link
8004
8005       check_params = cluster.SimpleFillNIC(nicparams)
8006       objects.NIC.CheckParameterSyntax(check_params)
8007       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8008
8009     # disk checks/pre-build
8010     default_vg = self.cfg.GetVGName()
8011     self.disks = []
8012     for disk in self.op.disks:
8013       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8014       if mode not in constants.DISK_ACCESS_SET:
8015         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8016                                    mode, errors.ECODE_INVAL)
8017       size = disk.get(constants.IDISK_SIZE, None)
8018       if size is None:
8019         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8020       try:
8021         size = int(size)
8022       except (TypeError, ValueError):
8023         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8024                                    errors.ECODE_INVAL)
8025
8026       data_vg = disk.get(constants.IDISK_VG, default_vg)
8027       new_disk = {
8028         constants.IDISK_SIZE: size,
8029         constants.IDISK_MODE: mode,
8030         constants.IDISK_VG: data_vg,
8031         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8032         }
8033       if constants.IDISK_ADOPT in disk:
8034         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8035       self.disks.append(new_disk)
8036
8037     if self.op.mode == constants.INSTANCE_IMPORT:
8038
8039       # Check that the new instance doesn't have less disks than the export
8040       instance_disks = len(self.disks)
8041       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8042       if instance_disks < export_disks:
8043         raise errors.OpPrereqError("Not enough disks to import."
8044                                    " (instance: %d, export: %d)" %
8045                                    (instance_disks, export_disks),
8046                                    errors.ECODE_INVAL)
8047
8048       disk_images = []
8049       for idx in range(export_disks):
8050         option = 'disk%d_dump' % idx
8051         if export_info.has_option(constants.INISECT_INS, option):
8052           # FIXME: are the old os-es, disk sizes, etc. useful?
8053           export_name = export_info.get(constants.INISECT_INS, option)
8054           image = utils.PathJoin(self.op.src_path, export_name)
8055           disk_images.append(image)
8056         else:
8057           disk_images.append(False)
8058
8059       self.src_images = disk_images
8060
8061       old_name = export_info.get(constants.INISECT_INS, 'name')
8062       try:
8063         exp_nic_count = export_info.getint(constants.INISECT_INS, 'nic_count')
8064       except (TypeError, ValueError), err:
8065         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8066                                    " an integer: %s" % str(err),
8067                                    errors.ECODE_STATE)
8068       if self.op.instance_name == old_name:
8069         for idx, nic in enumerate(self.nics):
8070           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8071             nic_mac_ini = 'nic%d_mac' % idx
8072             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8073
8074     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8075
8076     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8077     if self.op.ip_check:
8078       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8079         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8080                                    (self.check_ip, self.op.instance_name),
8081                                    errors.ECODE_NOTUNIQUE)
8082
8083     #### mac address generation
8084     # By generating here the mac address both the allocator and the hooks get
8085     # the real final mac address rather than the 'auto' or 'generate' value.
8086     # There is a race condition between the generation and the instance object
8087     # creation, which means that we know the mac is valid now, but we're not
8088     # sure it will be when we actually add the instance. If things go bad
8089     # adding the instance will abort because of a duplicate mac, and the
8090     # creation job will fail.
8091     for nic in self.nics:
8092       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8093         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8094
8095     #### allocator run
8096
8097     if self.op.iallocator is not None:
8098       self._RunAllocator()
8099
8100     #### node related checks
8101
8102     # check primary node
8103     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8104     assert self.pnode is not None, \
8105       "Cannot retrieve locked node %s" % self.op.pnode
8106     if pnode.offline:
8107       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8108                                  pnode.name, errors.ECODE_STATE)
8109     if pnode.drained:
8110       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8111                                  pnode.name, errors.ECODE_STATE)
8112     if not pnode.vm_capable:
8113       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8114                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8115
8116     self.secondaries = []
8117
8118     # mirror node verification
8119     if self.op.disk_template in constants.DTS_INT_MIRROR:
8120       if self.op.snode == pnode.name:
8121         raise errors.OpPrereqError("The secondary node cannot be the"
8122                                    " primary node", errors.ECODE_INVAL)
8123       _CheckNodeOnline(self, self.op.snode)
8124       _CheckNodeNotDrained(self, self.op.snode)
8125       _CheckNodeVmCapable(self, self.op.snode)
8126       self.secondaries.append(self.op.snode)
8127
8128     nodenames = [pnode.name] + self.secondaries
8129
8130     if not self.adopt_disks:
8131       # Check lv size requirements, if not adopting
8132       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8133       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8134
8135     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8136       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8137                                 disk[constants.IDISK_ADOPT])
8138                      for disk in self.disks])
8139       if len(all_lvs) != len(self.disks):
8140         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8141                                    errors.ECODE_INVAL)
8142       for lv_name in all_lvs:
8143         try:
8144           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8145           # to ReserveLV uses the same syntax
8146           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8147         except errors.ReservationError:
8148           raise errors.OpPrereqError("LV named %s used by another instance" %
8149                                      lv_name, errors.ECODE_NOTUNIQUE)
8150
8151       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8152       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8153
8154       node_lvs = self.rpc.call_lv_list([pnode.name],
8155                                        vg_names.payload.keys())[pnode.name]
8156       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8157       node_lvs = node_lvs.payload
8158
8159       delta = all_lvs.difference(node_lvs.keys())
8160       if delta:
8161         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8162                                    utils.CommaJoin(delta),
8163                                    errors.ECODE_INVAL)
8164       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8165       if online_lvs:
8166         raise errors.OpPrereqError("Online logical volumes found, cannot"
8167                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8168                                    errors.ECODE_STATE)
8169       # update the size of disk based on what is found
8170       for dsk in self.disks:
8171         dsk[constants.IDISK_SIZE] = \
8172           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8173                                         dsk[constants.IDISK_ADOPT])][0]))
8174
8175     elif self.op.disk_template == constants.DT_BLOCK:
8176       # Normalize and de-duplicate device paths
8177       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8178                        for disk in self.disks])
8179       if len(all_disks) != len(self.disks):
8180         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8181                                    errors.ECODE_INVAL)
8182       baddisks = [d for d in all_disks
8183                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8184       if baddisks:
8185         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8186                                    " cannot be adopted" %
8187                                    (", ".join(baddisks),
8188                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8189                                    errors.ECODE_INVAL)
8190
8191       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8192                                             list(all_disks))[pnode.name]
8193       node_disks.Raise("Cannot get block device information from node %s" %
8194                        pnode.name)
8195       node_disks = node_disks.payload
8196       delta = all_disks.difference(node_disks.keys())
8197       if delta:
8198         raise errors.OpPrereqError("Missing block device(s): %s" %
8199                                    utils.CommaJoin(delta),
8200                                    errors.ECODE_INVAL)
8201       for dsk in self.disks:
8202         dsk[constants.IDISK_SIZE] = \
8203           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8204
8205     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8206
8207     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8208     # check OS parameters (remotely)
8209     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8210
8211     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8212
8213     # memory check on primary node
8214     if self.op.start:
8215       _CheckNodeFreeMemory(self, self.pnode.name,
8216                            "creating instance %s" % self.op.instance_name,
8217                            self.be_full[constants.BE_MEMORY],
8218                            self.op.hypervisor)
8219
8220     self.dry_run_result = list(nodenames)
8221
8222   def Exec(self, feedback_fn):
8223     """Create and add the instance to the cluster.
8224
8225     """
8226     instance = self.op.instance_name
8227     pnode_name = self.pnode.name
8228
8229     ht_kind = self.op.hypervisor
8230     if ht_kind in constants.HTS_REQ_PORT:
8231       network_port = self.cfg.AllocatePort()
8232     else:
8233       network_port = None
8234
8235     if constants.ENABLE_FILE_STORAGE or constants.ENABLE_SHARED_FILE_STORAGE:
8236       # this is needed because os.path.join does not accept None arguments
8237       if self.op.file_storage_dir is None:
8238         string_file_storage_dir = ""
8239       else:
8240         string_file_storage_dir = self.op.file_storage_dir
8241
8242       # build the full file storage dir path
8243       if self.op.disk_template == constants.DT_SHARED_FILE:
8244         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8245       else:
8246         get_fsd_fn = self.cfg.GetFileStorageDir
8247
8248       file_storage_dir = utils.PathJoin(get_fsd_fn(),
8249                                         string_file_storage_dir, instance)
8250     else:
8251       file_storage_dir = ""
8252
8253     disks = _GenerateDiskTemplate(self,
8254                                   self.op.disk_template,
8255                                   instance, pnode_name,
8256                                   self.secondaries,
8257                                   self.disks,
8258                                   file_storage_dir,
8259                                   self.op.file_driver,
8260                                   0,
8261                                   feedback_fn)
8262
8263     iobj = objects.Instance(name=instance, os=self.op.os_type,
8264                             primary_node=pnode_name,
8265                             nics=self.nics, disks=disks,
8266                             disk_template=self.op.disk_template,
8267                             admin_up=False,
8268                             network_port=network_port,
8269                             beparams=self.op.beparams,
8270                             hvparams=self.op.hvparams,
8271                             hypervisor=self.op.hypervisor,
8272                             osparams=self.op.osparams,
8273                             )
8274
8275     if self.adopt_disks:
8276       if self.op.disk_template == constants.DT_PLAIN:
8277         # rename LVs to the newly-generated names; we need to construct
8278         # 'fake' LV disks with the old data, plus the new unique_id
8279         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8280         rename_to = []
8281         for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8282           rename_to.append(t_dsk.logical_id)
8283           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8284           self.cfg.SetDiskID(t_dsk, pnode_name)
8285         result = self.rpc.call_blockdev_rename(pnode_name,
8286                                                zip(tmp_disks, rename_to))
8287         result.Raise("Failed to rename adoped LVs")
8288     else:
8289       feedback_fn("* creating instance disks...")
8290       try:
8291         _CreateDisks(self, iobj)
8292       except errors.OpExecError:
8293         self.LogWarning("Device creation failed, reverting...")
8294         try:
8295           _RemoveDisks(self, iobj)
8296         finally:
8297           self.cfg.ReleaseDRBDMinors(instance)
8298           raise
8299
8300     feedback_fn("adding instance %s to cluster config" % instance)
8301
8302     self.cfg.AddInstance(iobj, self.proc.GetECId())
8303
8304     # Declare that we don't want to remove the instance lock anymore, as we've
8305     # added the instance to the config
8306     del self.remove_locks[locking.LEVEL_INSTANCE]
8307
8308     if self.op.mode == constants.INSTANCE_IMPORT:
8309       # Release unused nodes
8310       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8311     else:
8312       # Release all nodes
8313       _ReleaseLocks(self, locking.LEVEL_NODE)
8314
8315     disk_abort = False
8316     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8317       feedback_fn("* wiping instance disks...")
8318       try:
8319         _WipeDisks(self, iobj)
8320       except errors.OpExecError, err:
8321         logging.exception("Wiping disks failed")
8322         self.LogWarning("Wiping instance disks failed (%s)", err)
8323         disk_abort = True
8324
8325     if disk_abort:
8326       # Something is already wrong with the disks, don't do anything else
8327       pass
8328     elif self.op.wait_for_sync:
8329       disk_abort = not _WaitForSync(self, iobj)
8330     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8331       # make sure the disks are not degraded (still sync-ing is ok)
8332       time.sleep(15)
8333       feedback_fn("* checking mirrors status")
8334       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8335     else:
8336       disk_abort = False
8337
8338     if disk_abort:
8339       _RemoveDisks(self, iobj)
8340       self.cfg.RemoveInstance(iobj.name)
8341       # Make sure the instance lock gets removed
8342       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8343       raise errors.OpExecError("There are some degraded disks for"
8344                                " this instance")
8345
8346     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8347       if self.op.mode == constants.INSTANCE_CREATE:
8348         if not self.op.no_install:
8349           feedback_fn("* running the instance OS create scripts...")
8350           # FIXME: pass debug option from opcode to backend
8351           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8352                                                  self.op.debug_level)
8353           result.Raise("Could not add os for instance %s"
8354                        " on node %s" % (instance, pnode_name))
8355
8356       elif self.op.mode == constants.INSTANCE_IMPORT:
8357         feedback_fn("* running the instance OS import scripts...")
8358
8359         transfers = []
8360
8361         for idx, image in enumerate(self.src_images):
8362           if not image:
8363             continue
8364
8365           # FIXME: pass debug option from opcode to backend
8366           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8367                                              constants.IEIO_FILE, (image, ),
8368                                              constants.IEIO_SCRIPT,
8369                                              (iobj.disks[idx], idx),
8370                                              None)
8371           transfers.append(dt)
8372
8373         import_result = \
8374           masterd.instance.TransferInstanceData(self, feedback_fn,
8375                                                 self.op.src_node, pnode_name,
8376                                                 self.pnode.secondary_ip,
8377                                                 iobj, transfers)
8378         if not compat.all(import_result):
8379           self.LogWarning("Some disks for instance %s on node %s were not"
8380                           " imported successfully" % (instance, pnode_name))
8381
8382       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8383         feedback_fn("* preparing remote import...")
8384         # The source cluster will stop the instance before attempting to make a
8385         # connection. In some cases stopping an instance can take a long time,
8386         # hence the shutdown timeout is added to the connection timeout.
8387         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8388                            self.op.source_shutdown_timeout)
8389         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8390
8391         assert iobj.primary_node == self.pnode.name
8392         disk_results = \
8393           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8394                                         self.source_x509_ca,
8395                                         self._cds, timeouts)
8396         if not compat.all(disk_results):
8397           # TODO: Should the instance still be started, even if some disks
8398           # failed to import (valid for local imports, too)?
8399           self.LogWarning("Some disks for instance %s on node %s were not"
8400                           " imported successfully" % (instance, pnode_name))
8401
8402         # Run rename script on newly imported instance
8403         assert iobj.name == instance
8404         feedback_fn("Running rename script for %s" % instance)
8405         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8406                                                    self.source_instance_name,
8407                                                    self.op.debug_level)
8408         if result.fail_msg:
8409           self.LogWarning("Failed to run rename script for %s on node"
8410                           " %s: %s" % (instance, pnode_name, result.fail_msg))
8411
8412       else:
8413         # also checked in the prereq part
8414         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8415                                      % self.op.mode)
8416
8417     if self.op.start:
8418       iobj.admin_up = True
8419       self.cfg.Update(iobj, feedback_fn)
8420       logging.info("Starting instance %s on node %s", instance, pnode_name)
8421       feedback_fn("* starting instance...")
8422       result = self.rpc.call_instance_start(pnode_name, iobj, None, None)
8423       result.Raise("Could not start instance")
8424
8425     return list(iobj.all_nodes)
8426
8427
8428 class LUInstanceConsole(NoHooksLU):
8429   """Connect to an instance's console.
8430
8431   This is somewhat special in that it returns the command line that
8432   you need to run on the master node in order to connect to the
8433   console.
8434
8435   """
8436   REQ_BGL = False
8437
8438   def ExpandNames(self):
8439     self._ExpandAndLockInstance()
8440
8441   def CheckPrereq(self):
8442     """Check prerequisites.
8443
8444     This checks that the instance is in the cluster.
8445
8446     """
8447     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
8448     assert self.instance is not None, \
8449       "Cannot retrieve locked instance %s" % self.op.instance_name
8450     _CheckNodeOnline(self, self.instance.primary_node)
8451
8452   def Exec(self, feedback_fn):
8453     """Connect to the console of an instance
8454
8455     """
8456     instance = self.instance
8457     node = instance.primary_node
8458
8459     node_insts = self.rpc.call_instance_list([node],
8460                                              [instance.hypervisor])[node]
8461     node_insts.Raise("Can't get node information from %s" % node)
8462
8463     if instance.name not in node_insts.payload:
8464       if instance.admin_up:
8465         state = constants.INSTST_ERRORDOWN
8466       else:
8467         state = constants.INSTST_ADMINDOWN
8468       raise errors.OpExecError("Instance %s is not running (state %s)" %
8469                                (instance.name, state))
8470
8471     logging.debug("Connecting to console of %s on %s", instance.name, node)
8472
8473     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
8474
8475
8476 def _GetInstanceConsole(cluster, instance):
8477   """Returns console information for an instance.
8478
8479   @type cluster: L{objects.Cluster}
8480   @type instance: L{objects.Instance}
8481   @rtype: dict
8482
8483   """
8484   hyper = hypervisor.GetHypervisor(instance.hypervisor)
8485   # beparams and hvparams are passed separately, to avoid editing the
8486   # instance and then saving the defaults in the instance itself.
8487   hvparams = cluster.FillHV(instance)
8488   beparams = cluster.FillBE(instance)
8489   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
8490
8491   assert console.instance == instance.name
8492   assert console.Validate()
8493
8494   return console.ToDict()
8495
8496
8497 class LUInstanceReplaceDisks(LogicalUnit):
8498   """Replace the disks of an instance.
8499
8500   """
8501   HPATH = "mirrors-replace"
8502   HTYPE = constants.HTYPE_INSTANCE
8503   REQ_BGL = False
8504
8505   def CheckArguments(self):
8506     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
8507                                   self.op.iallocator)
8508
8509   def ExpandNames(self):
8510     self._ExpandAndLockInstance()
8511
8512     if self.op.iallocator is not None:
8513       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8514
8515     elif self.op.remote_node is not None:
8516       remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
8517       self.op.remote_node = remote_node
8518
8519       # Warning: do not remove the locking of the new secondary here
8520       # unless DRBD8.AddChildren is changed to work in parallel;
8521       # currently it doesn't since parallel invocations of
8522       # FindUnusedMinor will conflict
8523       self.needed_locks[locking.LEVEL_NODE] = [remote_node]
8524       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
8525
8526     else:
8527       self.needed_locks[locking.LEVEL_NODE] = []
8528       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
8529
8530     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
8531                                    self.op.iallocator, self.op.remote_node,
8532                                    self.op.disks, False, self.op.early_release)
8533
8534     self.tasklets = [self.replacer]
8535
8536   def DeclareLocks(self, level):
8537     # If we're not already locking all nodes in the set we have to declare the
8538     # instance's primary/secondary nodes.
8539     if (level == locking.LEVEL_NODE and
8540         self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET):
8541       self._LockInstancesNodes()
8542
8543   def BuildHooksEnv(self):
8544     """Build hooks env.
8545
8546     This runs on the master, the primary and all the secondaries.
8547
8548     """
8549     instance = self.replacer.instance
8550     env = {
8551       "MODE": self.op.mode,
8552       "NEW_SECONDARY": self.op.remote_node,
8553       "OLD_SECONDARY": instance.secondary_nodes[0],
8554       }
8555     env.update(_BuildInstanceHookEnvByObject(self, instance))
8556     return env
8557
8558   def BuildHooksNodes(self):
8559     """Build hooks nodes.
8560
8561     """
8562     instance = self.replacer.instance
8563     nl = [
8564       self.cfg.GetMasterNode(),
8565       instance.primary_node,
8566       ]
8567     if self.op.remote_node is not None:
8568       nl.append(self.op.remote_node)
8569     return nl, nl
8570
8571
8572 class TLReplaceDisks(Tasklet):
8573   """Replaces disks for an instance.
8574
8575   Note: Locking is not within the scope of this class.
8576
8577   """
8578   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
8579                disks, delay_iallocator, early_release):
8580     """Initializes this class.
8581
8582     """
8583     Tasklet.__init__(self, lu)
8584
8585     # Parameters
8586     self.instance_name = instance_name
8587     self.mode = mode
8588     self.iallocator_name = iallocator_name
8589     self.remote_node = remote_node
8590     self.disks = disks
8591     self.delay_iallocator = delay_iallocator
8592     self.early_release = early_release
8593
8594     # Runtime data
8595     self.instance = None
8596     self.new_node = None
8597     self.target_node = None
8598     self.other_node = None
8599     self.remote_node_info = None
8600     self.node_secondary_ip = None
8601
8602   @staticmethod
8603   def CheckArguments(mode, remote_node, iallocator):
8604     """Helper function for users of this class.
8605
8606     """
8607     # check for valid parameter combination
8608     if mode == constants.REPLACE_DISK_CHG:
8609       if remote_node is None and iallocator is None:
8610         raise errors.OpPrereqError("When changing the secondary either an"
8611                                    " iallocator script must be used or the"
8612                                    " new node given", errors.ECODE_INVAL)
8613
8614       if remote_node is not None and iallocator is not None:
8615         raise errors.OpPrereqError("Give either the iallocator or the new"
8616                                    " secondary, not both", errors.ECODE_INVAL)
8617
8618     elif remote_node is not None or iallocator is not None:
8619       # Not replacing the secondary
8620       raise errors.OpPrereqError("The iallocator and new node options can"
8621                                  " only be used when changing the"
8622                                  " secondary node", errors.ECODE_INVAL)
8623
8624   @staticmethod
8625   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
8626     """Compute a new secondary node using an IAllocator.
8627
8628     """
8629     ial = IAllocator(lu.cfg, lu.rpc,
8630                      mode=constants.IALLOCATOR_MODE_RELOC,
8631                      name=instance_name,
8632                      relocate_from=relocate_from)
8633
8634     ial.Run(iallocator_name)
8635
8636     if not ial.success:
8637       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
8638                                  " %s" % (iallocator_name, ial.info),
8639                                  errors.ECODE_NORES)
8640
8641     if len(ial.result) != ial.required_nodes:
8642       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8643                                  " of nodes (%s), required %s" %
8644                                  (iallocator_name,
8645                                   len(ial.result), ial.required_nodes),
8646                                  errors.ECODE_FAULT)
8647
8648     remote_node_name = ial.result[0]
8649
8650     lu.LogInfo("Selected new secondary for instance '%s': %s",
8651                instance_name, remote_node_name)
8652
8653     return remote_node_name
8654
8655   def _FindFaultyDisks(self, node_name):
8656     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
8657                                     node_name, True)
8658
8659   def _CheckDisksActivated(self, instance):
8660     """Checks if the instance disks are activated.
8661
8662     @param instance: The instance to check disks
8663     @return: True if they are activated, False otherwise
8664
8665     """
8666     nodes = instance.all_nodes
8667
8668     for idx, dev in enumerate(instance.disks):
8669       for node in nodes:
8670         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
8671         self.cfg.SetDiskID(dev, node)
8672
8673         result = self.rpc.call_blockdev_find(node, dev)
8674
8675         if result.offline:
8676           continue
8677         elif result.fail_msg or not result.payload:
8678           return False
8679
8680     return True
8681
8682   def CheckPrereq(self):
8683     """Check prerequisites.
8684
8685     This checks that the instance is in the cluster.
8686
8687     """
8688     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
8689     assert instance is not None, \
8690       "Cannot retrieve locked instance %s" % self.instance_name
8691
8692     if instance.disk_template != constants.DT_DRBD8:
8693       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
8694                                  " instances", errors.ECODE_INVAL)
8695
8696     if len(instance.secondary_nodes) != 1:
8697       raise errors.OpPrereqError("The instance has a strange layout,"
8698                                  " expected one secondary but found %d" %
8699                                  len(instance.secondary_nodes),
8700                                  errors.ECODE_FAULT)
8701
8702     if not self.delay_iallocator:
8703       self._CheckPrereq2()
8704
8705   def _CheckPrereq2(self):
8706     """Check prerequisites, second part.
8707
8708     This function should always be part of CheckPrereq. It was separated and is
8709     now called from Exec because during node evacuation iallocator was only
8710     called with an unmodified cluster model, not taking planned changes into
8711     account.
8712
8713     """
8714     instance = self.instance
8715     secondary_node = instance.secondary_nodes[0]
8716
8717     if self.iallocator_name is None:
8718       remote_node = self.remote_node
8719     else:
8720       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
8721                                        instance.name, instance.secondary_nodes)
8722
8723     if remote_node is not None:
8724       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
8725       assert self.remote_node_info is not None, \
8726         "Cannot retrieve locked node %s" % remote_node
8727     else:
8728       self.remote_node_info = None
8729
8730     if remote_node == self.instance.primary_node:
8731       raise errors.OpPrereqError("The specified node is the primary node of"
8732                                  " the instance", errors.ECODE_INVAL)
8733
8734     if remote_node == secondary_node:
8735       raise errors.OpPrereqError("The specified node is already the"
8736                                  " secondary node of the instance",
8737                                  errors.ECODE_INVAL)
8738
8739     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
8740                                     constants.REPLACE_DISK_CHG):
8741       raise errors.OpPrereqError("Cannot specify disks to be replaced",
8742                                  errors.ECODE_INVAL)
8743
8744     if self.mode == constants.REPLACE_DISK_AUTO:
8745       if not self._CheckDisksActivated(instance):
8746         raise errors.OpPrereqError("Please run activate-disks on instance %s"
8747                                    " first" % self.instance_name,
8748                                    errors.ECODE_STATE)
8749       faulty_primary = self._FindFaultyDisks(instance.primary_node)
8750       faulty_secondary = self._FindFaultyDisks(secondary_node)
8751
8752       if faulty_primary and faulty_secondary:
8753         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
8754                                    " one node and can not be repaired"
8755                                    " automatically" % self.instance_name,
8756                                    errors.ECODE_STATE)
8757
8758       if faulty_primary:
8759         self.disks = faulty_primary
8760         self.target_node = instance.primary_node
8761         self.other_node = secondary_node
8762         check_nodes = [self.target_node, self.other_node]
8763       elif faulty_secondary:
8764         self.disks = faulty_secondary
8765         self.target_node = secondary_node
8766         self.other_node = instance.primary_node
8767         check_nodes = [self.target_node, self.other_node]
8768       else:
8769         self.disks = []
8770         check_nodes = []
8771
8772     else:
8773       # Non-automatic modes
8774       if self.mode == constants.REPLACE_DISK_PRI:
8775         self.target_node = instance.primary_node
8776         self.other_node = secondary_node
8777         check_nodes = [self.target_node, self.other_node]
8778
8779       elif self.mode == constants.REPLACE_DISK_SEC:
8780         self.target_node = secondary_node
8781         self.other_node = instance.primary_node
8782         check_nodes = [self.target_node, self.other_node]
8783
8784       elif self.mode == constants.REPLACE_DISK_CHG:
8785         self.new_node = remote_node
8786         self.other_node = instance.primary_node
8787         self.target_node = secondary_node
8788         check_nodes = [self.new_node, self.other_node]
8789
8790         _CheckNodeNotDrained(self.lu, remote_node)
8791         _CheckNodeVmCapable(self.lu, remote_node)
8792
8793         old_node_info = self.cfg.GetNodeInfo(secondary_node)
8794         assert old_node_info is not None
8795         if old_node_info.offline and not self.early_release:
8796           # doesn't make sense to delay the release
8797           self.early_release = True
8798           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
8799                           " early-release mode", secondary_node)
8800
8801       else:
8802         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
8803                                      self.mode)
8804
8805       # If not specified all disks should be replaced
8806       if not self.disks:
8807         self.disks = range(len(self.instance.disks))
8808
8809     for node in check_nodes:
8810       _CheckNodeOnline(self.lu, node)
8811
8812     touched_nodes = frozenset([self.new_node, self.other_node,
8813                                self.target_node])
8814
8815     if self.lu.needed_locks[locking.LEVEL_NODE] == locking.ALL_SET:
8816       # Release unneeded node locks
8817       _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
8818
8819     # Check whether disks are valid
8820     for disk_idx in self.disks:
8821       instance.FindDisk(disk_idx)
8822
8823     # Get secondary node IP addresses
8824     self.node_secondary_ip = \
8825       dict((node_name, self.cfg.GetNodeInfo(node_name).secondary_ip)
8826            for node_name in touched_nodes
8827            if node_name is not None)
8828
8829   def Exec(self, feedback_fn):
8830     """Execute disk replacement.
8831
8832     This dispatches the disk replacement to the appropriate handler.
8833
8834     """
8835     if self.delay_iallocator:
8836       self._CheckPrereq2()
8837
8838     if (self.lu.needed_locks[locking.LEVEL_NODE] == locking.ALL_SET and
8839         __debug__):
8840       # Verify owned locks before starting operation
8841       owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE)
8842       assert set(owned_locks) == set(self.node_secondary_ip), \
8843           "Not owning the correct locks: %s" % (owned_locks, )
8844
8845     if not self.disks:
8846       feedback_fn("No disks need replacement")
8847       return
8848
8849     feedback_fn("Replacing disk(s) %s for %s" %
8850                 (utils.CommaJoin(self.disks), self.instance.name))
8851
8852     activate_disks = (not self.instance.admin_up)
8853
8854     # Activate the instance disks if we're replacing them on a down instance
8855     if activate_disks:
8856       _StartInstanceDisks(self.lu, self.instance, True)
8857
8858     try:
8859       # Should we replace the secondary node?
8860       if self.new_node is not None:
8861         fn = self._ExecDrbd8Secondary
8862       else:
8863         fn = self._ExecDrbd8DiskOnly
8864
8865       result = fn(feedback_fn)
8866     finally:
8867       # Deactivate the instance disks if we're replacing them on a
8868       # down instance
8869       if activate_disks:
8870         _SafeShutdownInstanceDisks(self.lu, self.instance)
8871
8872     if __debug__:
8873       # Verify owned locks
8874       owned_locks = self.lu.context.glm.list_owned(locking.LEVEL_NODE)
8875       nodes = frozenset(self.node_secondary_ip)
8876       assert ((self.early_release and not owned_locks) or
8877               (not self.early_release and not (set(owned_locks) - nodes))), \
8878         ("Not owning the correct locks, early_release=%s, owned=%r,"
8879          " nodes=%r" % (self.early_release, owned_locks, nodes))
8880
8881     return result
8882
8883   def _CheckVolumeGroup(self, nodes):
8884     self.lu.LogInfo("Checking volume groups")
8885
8886     vgname = self.cfg.GetVGName()
8887
8888     # Make sure volume group exists on all involved nodes
8889     results = self.rpc.call_vg_list(nodes)
8890     if not results:
8891       raise errors.OpExecError("Can't list volume groups on the nodes")
8892
8893     for node in nodes:
8894       res = results[node]
8895       res.Raise("Error checking node %s" % node)
8896       if vgname not in res.payload:
8897         raise errors.OpExecError("Volume group '%s' not found on node %s" %
8898                                  (vgname, node))
8899
8900   def _CheckDisksExistence(self, nodes):
8901     # Check disk existence
8902     for idx, dev in enumerate(self.instance.disks):
8903       if idx not in self.disks:
8904         continue
8905
8906       for node in nodes:
8907         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
8908         self.cfg.SetDiskID(dev, node)
8909
8910         result = self.rpc.call_blockdev_find(node, dev)
8911
8912         msg = result.fail_msg
8913         if msg or not result.payload:
8914           if not msg:
8915             msg = "disk not found"
8916           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
8917                                    (idx, node, msg))
8918
8919   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
8920     for idx, dev in enumerate(self.instance.disks):
8921       if idx not in self.disks:
8922         continue
8923
8924       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
8925                       (idx, node_name))
8926
8927       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
8928                                    ldisk=ldisk):
8929         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
8930                                  " replace disks for instance %s" %
8931                                  (node_name, self.instance.name))
8932
8933   def _CreateNewStorage(self, node_name):
8934     iv_names = {}
8935
8936     for idx, dev in enumerate(self.instance.disks):
8937       if idx not in self.disks:
8938         continue
8939
8940       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
8941
8942       self.cfg.SetDiskID(dev, node_name)
8943
8944       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
8945       names = _GenerateUniqueNames(self.lu, lv_names)
8946
8947       vg_data = dev.children[0].logical_id[0]
8948       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
8949                              logical_id=(vg_data, names[0]))
8950       vg_meta = dev.children[1].logical_id[0]
8951       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
8952                              logical_id=(vg_meta, names[1]))
8953
8954       new_lvs = [lv_data, lv_meta]
8955       old_lvs = dev.children
8956       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
8957
8958       # we pass force_create=True to force the LVM creation
8959       for new_lv in new_lvs:
8960         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
8961                         _GetInstanceInfoText(self.instance), False)
8962
8963     return iv_names
8964
8965   def _CheckDevices(self, node_name, iv_names):
8966     for name, (dev, _, _) in iv_names.iteritems():
8967       self.cfg.SetDiskID(dev, node_name)
8968
8969       result = self.rpc.call_blockdev_find(node_name, dev)
8970
8971       msg = result.fail_msg
8972       if msg or not result.payload:
8973         if not msg:
8974           msg = "disk not found"
8975         raise errors.OpExecError("Can't find DRBD device %s: %s" %
8976                                  (name, msg))
8977
8978       if result.payload.is_degraded:
8979         raise errors.OpExecError("DRBD device %s is degraded!" % name)
8980
8981   def _RemoveOldStorage(self, node_name, iv_names):
8982     for name, (_, old_lvs, _) in iv_names.iteritems():
8983       self.lu.LogInfo("Remove logical volumes for %s" % name)
8984
8985       for lv in old_lvs:
8986         self.cfg.SetDiskID(lv, node_name)
8987
8988         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
8989         if msg:
8990           self.lu.LogWarning("Can't remove old LV: %s" % msg,
8991                              hint="remove unused LVs manually")
8992
8993   def _ExecDrbd8DiskOnly(self, feedback_fn):
8994     """Replace a disk on the primary or secondary for DRBD 8.
8995
8996     The algorithm for replace is quite complicated:
8997
8998       1. for each disk to be replaced:
8999
9000         1. create new LVs on the target node with unique names
9001         1. detach old LVs from the drbd device
9002         1. rename old LVs to name_replaced.<time_t>
9003         1. rename new LVs to old LVs
9004         1. attach the new LVs (with the old names now) to the drbd device
9005
9006       1. wait for sync across all devices
9007
9008       1. for each modified disk:
9009
9010         1. remove old LVs (which have the name name_replaces.<time_t>)
9011
9012     Failures are not very well handled.
9013
9014     """
9015     steps_total = 6
9016
9017     # Step: check device activation
9018     self.lu.LogStep(1, steps_total, "Check device existence")
9019     self._CheckDisksExistence([self.other_node, self.target_node])
9020     self._CheckVolumeGroup([self.target_node, self.other_node])
9021
9022     # Step: check other node consistency
9023     self.lu.LogStep(2, steps_total, "Check peer consistency")
9024     self._CheckDisksConsistency(self.other_node,
9025                                 self.other_node == self.instance.primary_node,
9026                                 False)
9027
9028     # Step: create new storage
9029     self.lu.LogStep(3, steps_total, "Allocate new storage")
9030     iv_names = self._CreateNewStorage(self.target_node)
9031
9032     # Step: for each lv, detach+rename*2+attach
9033     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9034     for dev, old_lvs, new_lvs in iv_names.itervalues():
9035       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9036
9037       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9038                                                      old_lvs)
9039       result.Raise("Can't detach drbd from local storage on node"
9040                    " %s for device %s" % (self.target_node, dev.iv_name))
9041       #dev.children = []
9042       #cfg.Update(instance)
9043
9044       # ok, we created the new LVs, so now we know we have the needed
9045       # storage; as such, we proceed on the target node to rename
9046       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9047       # using the assumption that logical_id == physical_id (which in
9048       # turn is the unique_id on that node)
9049
9050       # FIXME(iustin): use a better name for the replaced LVs
9051       temp_suffix = int(time.time())
9052       ren_fn = lambda d, suff: (d.physical_id[0],
9053                                 d.physical_id[1] + "_replaced-%s" % suff)
9054
9055       # Build the rename list based on what LVs exist on the node
9056       rename_old_to_new = []
9057       for to_ren in old_lvs:
9058         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9059         if not result.fail_msg and result.payload:
9060           # device exists
9061           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9062
9063       self.lu.LogInfo("Renaming the old LVs on the target node")
9064       result = self.rpc.call_blockdev_rename(self.target_node,
9065                                              rename_old_to_new)
9066       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9067
9068       # Now we rename the new LVs to the old LVs
9069       self.lu.LogInfo("Renaming the new LVs on the target node")
9070       rename_new_to_old = [(new, old.physical_id)
9071                            for old, new in zip(old_lvs, new_lvs)]
9072       result = self.rpc.call_blockdev_rename(self.target_node,
9073                                              rename_new_to_old)
9074       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9075
9076       for old, new in zip(old_lvs, new_lvs):
9077         new.logical_id = old.logical_id
9078         self.cfg.SetDiskID(new, self.target_node)
9079
9080       for disk in old_lvs:
9081         disk.logical_id = ren_fn(disk, temp_suffix)
9082         self.cfg.SetDiskID(disk, self.target_node)
9083
9084       # Now that the new lvs have the old name, we can add them to the device
9085       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9086       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9087                                                   new_lvs)
9088       msg = result.fail_msg
9089       if msg:
9090         for new_lv in new_lvs:
9091           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9092                                                new_lv).fail_msg
9093           if msg2:
9094             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9095                                hint=("cleanup manually the unused logical"
9096                                      "volumes"))
9097         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9098
9099       dev.children = new_lvs
9100
9101       self.cfg.Update(self.instance, feedback_fn)
9102
9103     cstep = 5
9104     if self.early_release:
9105       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9106       cstep += 1
9107       self._RemoveOldStorage(self.target_node, iv_names)
9108       # WARNING: we release both node locks here, do not do other RPCs
9109       # than WaitForSync to the primary node
9110       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9111                     names=[self.target_node, self.other_node])
9112
9113     # Wait for sync
9114     # This can fail as the old devices are degraded and _WaitForSync
9115     # does a combined result over all disks, so we don't check its return value
9116     self.lu.LogStep(cstep, steps_total, "Sync devices")
9117     cstep += 1
9118     _WaitForSync(self.lu, self.instance)
9119
9120     # Check all devices manually
9121     self._CheckDevices(self.instance.primary_node, iv_names)
9122
9123     # Step: remove old storage
9124     if not self.early_release:
9125       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9126       cstep += 1
9127       self._RemoveOldStorage(self.target_node, iv_names)
9128
9129   def _ExecDrbd8Secondary(self, feedback_fn):
9130     """Replace the secondary node for DRBD 8.
9131
9132     The algorithm for replace is quite complicated:
9133       - for all disks of the instance:
9134         - create new LVs on the new node with same names
9135         - shutdown the drbd device on the old secondary
9136         - disconnect the drbd network on the primary
9137         - create the drbd device on the new secondary
9138         - network attach the drbd on the primary, using an artifice:
9139           the drbd code for Attach() will connect to the network if it
9140           finds a device which is connected to the good local disks but
9141           not network enabled
9142       - wait for sync across all devices
9143       - remove all disks from the old secondary
9144
9145     Failures are not very well handled.
9146
9147     """
9148     steps_total = 6
9149
9150     # Step: check device activation
9151     self.lu.LogStep(1, steps_total, "Check device existence")
9152     self._CheckDisksExistence([self.instance.primary_node])
9153     self._CheckVolumeGroup([self.instance.primary_node])
9154
9155     # Step: check other node consistency
9156     self.lu.LogStep(2, steps_total, "Check peer consistency")
9157     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9158
9159     # Step: create new storage
9160     self.lu.LogStep(3, steps_total, "Allocate new storage")
9161     for idx, dev in enumerate(self.instance.disks):
9162       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9163                       (self.new_node, idx))
9164       # we pass force_create=True to force LVM creation
9165       for new_lv in dev.children:
9166         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9167                         _GetInstanceInfoText(self.instance), False)
9168
9169     # Step 4: dbrd minors and drbd setups changes
9170     # after this, we must manually remove the drbd minors on both the
9171     # error and the success paths
9172     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9173     minors = self.cfg.AllocateDRBDMinor([self.new_node
9174                                          for dev in self.instance.disks],
9175                                         self.instance.name)
9176     logging.debug("Allocated minors %r", minors)
9177
9178     iv_names = {}
9179     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9180       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9181                       (self.new_node, idx))
9182       # create new devices on new_node; note that we create two IDs:
9183       # one without port, so the drbd will be activated without
9184       # networking information on the new node at this stage, and one
9185       # with network, for the latter activation in step 4
9186       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9187       if self.instance.primary_node == o_node1:
9188         p_minor = o_minor1
9189       else:
9190         assert self.instance.primary_node == o_node2, "Three-node instance?"
9191         p_minor = o_minor2
9192
9193       new_alone_id = (self.instance.primary_node, self.new_node, None,
9194                       p_minor, new_minor, o_secret)
9195       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9196                     p_minor, new_minor, o_secret)
9197
9198       iv_names[idx] = (dev, dev.children, new_net_id)
9199       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9200                     new_net_id)
9201       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9202                               logical_id=new_alone_id,
9203                               children=dev.children,
9204                               size=dev.size)
9205       try:
9206         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9207                               _GetInstanceInfoText(self.instance), False)
9208       except errors.GenericError:
9209         self.cfg.ReleaseDRBDMinors(self.instance.name)
9210         raise
9211
9212     # We have new devices, shutdown the drbd on the old secondary
9213     for idx, dev in enumerate(self.instance.disks):
9214       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9215       self.cfg.SetDiskID(dev, self.target_node)
9216       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9217       if msg:
9218         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9219                            "node: %s" % (idx, msg),
9220                            hint=("Please cleanup this device manually as"
9221                                  " soon as possible"))
9222
9223     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9224     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9225                                                self.node_secondary_ip,
9226                                                self.instance.disks)\
9227                                               [self.instance.primary_node]
9228
9229     msg = result.fail_msg
9230     if msg:
9231       # detaches didn't succeed (unlikely)
9232       self.cfg.ReleaseDRBDMinors(self.instance.name)
9233       raise errors.OpExecError("Can't detach the disks from the network on"
9234                                " old node: %s" % (msg,))
9235
9236     # if we managed to detach at least one, we update all the disks of
9237     # the instance to point to the new secondary
9238     self.lu.LogInfo("Updating instance configuration")
9239     for dev, _, new_logical_id in iv_names.itervalues():
9240       dev.logical_id = new_logical_id
9241       self.cfg.SetDiskID(dev, self.instance.primary_node)
9242
9243     self.cfg.Update(self.instance, feedback_fn)
9244
9245     # and now perform the drbd attach
9246     self.lu.LogInfo("Attaching primary drbds to new secondary"
9247                     " (standalone => connected)")
9248     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9249                                             self.new_node],
9250                                            self.node_secondary_ip,
9251                                            self.instance.disks,
9252                                            self.instance.name,
9253                                            False)
9254     for to_node, to_result in result.items():
9255       msg = to_result.fail_msg
9256       if msg:
9257         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9258                            to_node, msg,
9259                            hint=("please do a gnt-instance info to see the"
9260                                  " status of disks"))
9261     cstep = 5
9262     if self.early_release:
9263       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9264       cstep += 1
9265       self._RemoveOldStorage(self.target_node, iv_names)
9266       # WARNING: we release all node locks here, do not do other RPCs
9267       # than WaitForSync to the primary node
9268       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9269                     names=[self.instance.primary_node,
9270                            self.target_node,
9271                            self.new_node])
9272
9273     # Wait for sync
9274     # This can fail as the old devices are degraded and _WaitForSync
9275     # does a combined result over all disks, so we don't check its return value
9276     self.lu.LogStep(cstep, steps_total, "Sync devices")
9277     cstep += 1
9278     _WaitForSync(self.lu, self.instance)
9279
9280     # Check all devices manually
9281     self._CheckDevices(self.instance.primary_node, iv_names)
9282
9283     # Step: remove old storage
9284     if not self.early_release:
9285       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9286       self._RemoveOldStorage(self.target_node, iv_names)
9287
9288
9289 class LURepairNodeStorage(NoHooksLU):
9290   """Repairs the volume group on a node.
9291
9292   """
9293   REQ_BGL = False
9294
9295   def CheckArguments(self):
9296     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9297
9298     storage_type = self.op.storage_type
9299
9300     if (constants.SO_FIX_CONSISTENCY not in
9301         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9302       raise errors.OpPrereqError("Storage units of type '%s' can not be"
9303                                  " repaired" % storage_type,
9304                                  errors.ECODE_INVAL)
9305
9306   def ExpandNames(self):
9307     self.needed_locks = {
9308       locking.LEVEL_NODE: [self.op.node_name],
9309       }
9310
9311   def _CheckFaultyDisks(self, instance, node_name):
9312     """Ensure faulty disks abort the opcode or at least warn."""
9313     try:
9314       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9315                                   node_name, True):
9316         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9317                                    " node '%s'" % (instance.name, node_name),
9318                                    errors.ECODE_STATE)
9319     except errors.OpPrereqError, err:
9320       if self.op.ignore_consistency:
9321         self.proc.LogWarning(str(err.args[0]))
9322       else:
9323         raise
9324
9325   def CheckPrereq(self):
9326     """Check prerequisites.
9327
9328     """
9329     # Check whether any instance on this node has faulty disks
9330     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9331       if not inst.admin_up:
9332         continue
9333       check_nodes = set(inst.all_nodes)
9334       check_nodes.discard(self.op.node_name)
9335       for inst_node_name in check_nodes:
9336         self._CheckFaultyDisks(inst, inst_node_name)
9337
9338   def Exec(self, feedback_fn):
9339     feedback_fn("Repairing storage unit '%s' on %s ..." %
9340                 (self.op.name, self.op.node_name))
9341
9342     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9343     result = self.rpc.call_storage_execute(self.op.node_name,
9344                                            self.op.storage_type, st_args,
9345                                            self.op.name,
9346                                            constants.SO_FIX_CONSISTENCY)
9347     result.Raise("Failed to repair storage unit '%s' on %s" %
9348                  (self.op.name, self.op.node_name))
9349
9350
9351 class LUNodeEvacStrategy(NoHooksLU):
9352   """Computes the node evacuation strategy.
9353
9354   """
9355   REQ_BGL = False
9356
9357   def CheckArguments(self):
9358     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9359
9360   def ExpandNames(self):
9361     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
9362     self.needed_locks = locks = {}
9363     if self.op.remote_node is None:
9364       locks[locking.LEVEL_NODE] = locking.ALL_SET
9365     else:
9366       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9367       locks[locking.LEVEL_NODE] = self.op.nodes + [self.op.remote_node]
9368
9369   def Exec(self, feedback_fn):
9370     if self.op.remote_node is not None:
9371       instances = []
9372       for node in self.op.nodes:
9373         instances.extend(_GetNodeSecondaryInstances(self.cfg, node))
9374       result = []
9375       for i in instances:
9376         if i.primary_node == self.op.remote_node:
9377           raise errors.OpPrereqError("Node %s is the primary node of"
9378                                      " instance %s, cannot use it as"
9379                                      " secondary" %
9380                                      (self.op.remote_node, i.name),
9381                                      errors.ECODE_INVAL)
9382         result.append([i.name, self.op.remote_node])
9383     else:
9384       ial = IAllocator(self.cfg, self.rpc,
9385                        mode=constants.IALLOCATOR_MODE_MEVAC,
9386                        evac_nodes=self.op.nodes)
9387       ial.Run(self.op.iallocator, validate=True)
9388       if not ial.success:
9389         raise errors.OpExecError("No valid evacuation solution: %s" % ial.info,
9390                                  errors.ECODE_NORES)
9391       result = ial.result
9392     return result
9393
9394
9395 class LUInstanceGrowDisk(LogicalUnit):
9396   """Grow a disk of an instance.
9397
9398   """
9399   HPATH = "disk-grow"
9400   HTYPE = constants.HTYPE_INSTANCE
9401   REQ_BGL = False
9402
9403   def ExpandNames(self):
9404     self._ExpandAndLockInstance()
9405     self.needed_locks[locking.LEVEL_NODE] = []
9406     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9407
9408   def DeclareLocks(self, level):
9409     if level == locking.LEVEL_NODE:
9410       self._LockInstancesNodes()
9411
9412   def BuildHooksEnv(self):
9413     """Build hooks env.
9414
9415     This runs on the master, the primary and all the secondaries.
9416
9417     """
9418     env = {
9419       "DISK": self.op.disk,
9420       "AMOUNT": self.op.amount,
9421       }
9422     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
9423     return env
9424
9425   def BuildHooksNodes(self):
9426     """Build hooks nodes.
9427
9428     """
9429     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9430     return (nl, nl)
9431
9432   def CheckPrereq(self):
9433     """Check prerequisites.
9434
9435     This checks that the instance is in the cluster.
9436
9437     """
9438     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9439     assert instance is not None, \
9440       "Cannot retrieve locked instance %s" % self.op.instance_name
9441     nodenames = list(instance.all_nodes)
9442     for node in nodenames:
9443       _CheckNodeOnline(self, node)
9444
9445     self.instance = instance
9446
9447     if instance.disk_template not in constants.DTS_GROWABLE:
9448       raise errors.OpPrereqError("Instance's disk layout does not support"
9449                                  " growing", errors.ECODE_INVAL)
9450
9451     self.disk = instance.FindDisk(self.op.disk)
9452
9453     if instance.disk_template not in (constants.DT_FILE,
9454                                       constants.DT_SHARED_FILE):
9455       # TODO: check the free disk space for file, when that feature will be
9456       # supported
9457       _CheckNodesFreeDiskPerVG(self, nodenames,
9458                                self.disk.ComputeGrowth(self.op.amount))
9459
9460   def Exec(self, feedback_fn):
9461     """Execute disk grow.
9462
9463     """
9464     instance = self.instance
9465     disk = self.disk
9466
9467     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
9468     if not disks_ok:
9469       raise errors.OpExecError("Cannot activate block device to grow")
9470
9471     for node in instance.all_nodes:
9472       self.cfg.SetDiskID(disk, node)
9473       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount)
9474       result.Raise("Grow request failed to node %s" % node)
9475
9476       # TODO: Rewrite code to work properly
9477       # DRBD goes into sync mode for a short amount of time after executing the
9478       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
9479       # calling "resize" in sync mode fails. Sleeping for a short amount of
9480       # time is a work-around.
9481       time.sleep(5)
9482
9483     disk.RecordGrow(self.op.amount)
9484     self.cfg.Update(instance, feedback_fn)
9485     if self.op.wait_for_sync:
9486       disk_abort = not _WaitForSync(self, instance, disks=[disk])
9487       if disk_abort:
9488         self.proc.LogWarning("Disk sync-ing has not returned a good"
9489                              " status; please check the instance")
9490       if not instance.admin_up:
9491         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
9492     elif not instance.admin_up:
9493       self.proc.LogWarning("Not shutting down the disk even if the instance is"
9494                            " not supposed to be running because no wait for"
9495                            " sync mode was requested")
9496
9497
9498 class LUInstanceQueryData(NoHooksLU):
9499   """Query runtime instance data.
9500
9501   """
9502   REQ_BGL = False
9503
9504   def ExpandNames(self):
9505     self.needed_locks = {}
9506
9507     # Use locking if requested or when non-static information is wanted
9508     if not (self.op.static or self.op.use_locking):
9509       self.LogWarning("Non-static data requested, locks need to be acquired")
9510       self.op.use_locking = True
9511
9512     if self.op.instances or not self.op.use_locking:
9513       # Expand instance names right here
9514       self.wanted_names = _GetWantedInstances(self, self.op.instances)
9515     else:
9516       # Will use acquired locks
9517       self.wanted_names = None
9518
9519     if self.op.use_locking:
9520       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9521
9522       if self.wanted_names is None:
9523         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
9524       else:
9525         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
9526
9527       self.needed_locks[locking.LEVEL_NODE] = []
9528       self.share_locks = dict.fromkeys(locking.LEVELS, 1)
9529       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9530
9531   def DeclareLocks(self, level):
9532     if self.op.use_locking and level == locking.LEVEL_NODE:
9533       self._LockInstancesNodes()
9534
9535   def CheckPrereq(self):
9536     """Check prerequisites.
9537
9538     This only checks the optional instance list against the existing names.
9539
9540     """
9541     if self.wanted_names is None:
9542       assert self.op.use_locking, "Locking was not used"
9543       self.wanted_names = self.acquired_locks[locking.LEVEL_INSTANCE]
9544
9545     self.wanted_instances = [self.cfg.GetInstanceInfo(name)
9546                              for name in self.wanted_names]
9547
9548   def _ComputeBlockdevStatus(self, node, instance_name, dev):
9549     """Returns the status of a block device
9550
9551     """
9552     if self.op.static or not node:
9553       return None
9554
9555     self.cfg.SetDiskID(dev, node)
9556
9557     result = self.rpc.call_blockdev_find(node, dev)
9558     if result.offline:
9559       return None
9560
9561     result.Raise("Can't compute disk status for %s" % instance_name)
9562
9563     status = result.payload
9564     if status is None:
9565       return None
9566
9567     return (status.dev_path, status.major, status.minor,
9568             status.sync_percent, status.estimated_time,
9569             status.is_degraded, status.ldisk_status)
9570
9571   def _ComputeDiskStatus(self, instance, snode, dev):
9572     """Compute block device status.
9573
9574     """
9575     if dev.dev_type in constants.LDS_DRBD:
9576       # we change the snode then (otherwise we use the one passed in)
9577       if dev.logical_id[0] == instance.primary_node:
9578         snode = dev.logical_id[1]
9579       else:
9580         snode = dev.logical_id[0]
9581
9582     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
9583                                               instance.name, dev)
9584     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
9585
9586     if dev.children:
9587       dev_children = [self._ComputeDiskStatus(instance, snode, child)
9588                       for child in dev.children]
9589     else:
9590       dev_children = []
9591
9592     return {
9593       "iv_name": dev.iv_name,
9594       "dev_type": dev.dev_type,
9595       "logical_id": dev.logical_id,
9596       "physical_id": dev.physical_id,
9597       "pstatus": dev_pstatus,
9598       "sstatus": dev_sstatus,
9599       "children": dev_children,
9600       "mode": dev.mode,
9601       "size": dev.size,
9602       }
9603
9604   def Exec(self, feedback_fn):
9605     """Gather and return data"""
9606     result = {}
9607
9608     cluster = self.cfg.GetClusterInfo()
9609
9610     for instance in self.wanted_instances:
9611       if not self.op.static:
9612         remote_info = self.rpc.call_instance_info(instance.primary_node,
9613                                                   instance.name,
9614                                                   instance.hypervisor)
9615         remote_info.Raise("Error checking node %s" % instance.primary_node)
9616         remote_info = remote_info.payload
9617         if remote_info and "state" in remote_info:
9618           remote_state = "up"
9619         else:
9620           remote_state = "down"
9621       else:
9622         remote_state = None
9623       if instance.admin_up:
9624         config_state = "up"
9625       else:
9626         config_state = "down"
9627
9628       disks = [self._ComputeDiskStatus(instance, None, device)
9629                for device in instance.disks]
9630
9631       result[instance.name] = {
9632         "name": instance.name,
9633         "config_state": config_state,
9634         "run_state": remote_state,
9635         "pnode": instance.primary_node,
9636         "snodes": instance.secondary_nodes,
9637         "os": instance.os,
9638         # this happens to be the same format used for hooks
9639         "nics": _NICListToTuple(self, instance.nics),
9640         "disk_template": instance.disk_template,
9641         "disks": disks,
9642         "hypervisor": instance.hypervisor,
9643         "network_port": instance.network_port,
9644         "hv_instance": instance.hvparams,
9645         "hv_actual": cluster.FillHV(instance, skip_globals=True),
9646         "be_instance": instance.beparams,
9647         "be_actual": cluster.FillBE(instance),
9648         "os_instance": instance.osparams,
9649         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
9650         "serial_no": instance.serial_no,
9651         "mtime": instance.mtime,
9652         "ctime": instance.ctime,
9653         "uuid": instance.uuid,
9654         }
9655
9656     return result
9657
9658
9659 class LUInstanceSetParams(LogicalUnit):
9660   """Modifies an instances's parameters.
9661
9662   """
9663   HPATH = "instance-modify"
9664   HTYPE = constants.HTYPE_INSTANCE
9665   REQ_BGL = False
9666
9667   def CheckArguments(self):
9668     if not (self.op.nics or self.op.disks or self.op.disk_template or
9669             self.op.hvparams or self.op.beparams or self.op.os_name):
9670       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
9671
9672     if self.op.hvparams:
9673       _CheckGlobalHvParams(self.op.hvparams)
9674
9675     # Disk validation
9676     disk_addremove = 0
9677     for disk_op, disk_dict in self.op.disks:
9678       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
9679       if disk_op == constants.DDM_REMOVE:
9680         disk_addremove += 1
9681         continue
9682       elif disk_op == constants.DDM_ADD:
9683         disk_addremove += 1
9684       else:
9685         if not isinstance(disk_op, int):
9686           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
9687         if not isinstance(disk_dict, dict):
9688           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
9689           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9690
9691       if disk_op == constants.DDM_ADD:
9692         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
9693         if mode not in constants.DISK_ACCESS_SET:
9694           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
9695                                      errors.ECODE_INVAL)
9696         size = disk_dict.get(constants.IDISK_SIZE, None)
9697         if size is None:
9698           raise errors.OpPrereqError("Required disk parameter size missing",
9699                                      errors.ECODE_INVAL)
9700         try:
9701           size = int(size)
9702         except (TypeError, ValueError), err:
9703           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
9704                                      str(err), errors.ECODE_INVAL)
9705         disk_dict[constants.IDISK_SIZE] = size
9706       else:
9707         # modification of disk
9708         if constants.IDISK_SIZE in disk_dict:
9709           raise errors.OpPrereqError("Disk size change not possible, use"
9710                                      " grow-disk", errors.ECODE_INVAL)
9711
9712     if disk_addremove > 1:
9713       raise errors.OpPrereqError("Only one disk add or remove operation"
9714                                  " supported at a time", errors.ECODE_INVAL)
9715
9716     if self.op.disks and self.op.disk_template is not None:
9717       raise errors.OpPrereqError("Disk template conversion and other disk"
9718                                  " changes not supported at the same time",
9719                                  errors.ECODE_INVAL)
9720
9721     if (self.op.disk_template and
9722         self.op.disk_template in constants.DTS_INT_MIRROR and
9723         self.op.remote_node is None):
9724       raise errors.OpPrereqError("Changing the disk template to a mirrored"
9725                                  " one requires specifying a secondary node",
9726                                  errors.ECODE_INVAL)
9727
9728     # NIC validation
9729     nic_addremove = 0
9730     for nic_op, nic_dict in self.op.nics:
9731       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
9732       if nic_op == constants.DDM_REMOVE:
9733         nic_addremove += 1
9734         continue
9735       elif nic_op == constants.DDM_ADD:
9736         nic_addremove += 1
9737       else:
9738         if not isinstance(nic_op, int):
9739           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
9740         if not isinstance(nic_dict, dict):
9741           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
9742           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
9743
9744       # nic_dict should be a dict
9745       nic_ip = nic_dict.get(constants.INIC_IP, None)
9746       if nic_ip is not None:
9747         if nic_ip.lower() == constants.VALUE_NONE:
9748           nic_dict[constants.INIC_IP] = None
9749         else:
9750           if not netutils.IPAddress.IsValid(nic_ip):
9751             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
9752                                        errors.ECODE_INVAL)
9753
9754       nic_bridge = nic_dict.get('bridge', None)
9755       nic_link = nic_dict.get(constants.INIC_LINK, None)
9756       if nic_bridge and nic_link:
9757         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
9758                                    " at the same time", errors.ECODE_INVAL)
9759       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
9760         nic_dict['bridge'] = None
9761       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
9762         nic_dict[constants.INIC_LINK] = None
9763
9764       if nic_op == constants.DDM_ADD:
9765         nic_mac = nic_dict.get(constants.INIC_MAC, None)
9766         if nic_mac is None:
9767           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
9768
9769       if constants.INIC_MAC in nic_dict:
9770         nic_mac = nic_dict[constants.INIC_MAC]
9771         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
9772           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
9773
9774         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
9775           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
9776                                      " modifying an existing nic",
9777                                      errors.ECODE_INVAL)
9778
9779     if nic_addremove > 1:
9780       raise errors.OpPrereqError("Only one NIC add or remove operation"
9781                                  " supported at a time", errors.ECODE_INVAL)
9782
9783   def ExpandNames(self):
9784     self._ExpandAndLockInstance()
9785     self.needed_locks[locking.LEVEL_NODE] = []
9786     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9787
9788   def DeclareLocks(self, level):
9789     if level == locking.LEVEL_NODE:
9790       self._LockInstancesNodes()
9791       if self.op.disk_template and self.op.remote_node:
9792         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9793         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
9794
9795   def BuildHooksEnv(self):
9796     """Build hooks env.
9797
9798     This runs on the master, primary and secondaries.
9799
9800     """
9801     args = dict()
9802     if constants.BE_MEMORY in self.be_new:
9803       args['memory'] = self.be_new[constants.BE_MEMORY]
9804     if constants.BE_VCPUS in self.be_new:
9805       args['vcpus'] = self.be_new[constants.BE_VCPUS]
9806     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
9807     # information at all.
9808     if self.op.nics:
9809       args['nics'] = []
9810       nic_override = dict(self.op.nics)
9811       for idx, nic in enumerate(self.instance.nics):
9812         if idx in nic_override:
9813           this_nic_override = nic_override[idx]
9814         else:
9815           this_nic_override = {}
9816         if constants.INIC_IP in this_nic_override:
9817           ip = this_nic_override[constants.INIC_IP]
9818         else:
9819           ip = nic.ip
9820         if constants.INIC_MAC in this_nic_override:
9821           mac = this_nic_override[constants.INIC_MAC]
9822         else:
9823           mac = nic.mac
9824         if idx in self.nic_pnew:
9825           nicparams = self.nic_pnew[idx]
9826         else:
9827           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
9828         mode = nicparams[constants.NIC_MODE]
9829         link = nicparams[constants.NIC_LINK]
9830         args['nics'].append((ip, mac, mode, link))
9831       if constants.DDM_ADD in nic_override:
9832         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
9833         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
9834         nicparams = self.nic_pnew[constants.DDM_ADD]
9835         mode = nicparams[constants.NIC_MODE]
9836         link = nicparams[constants.NIC_LINK]
9837         args['nics'].append((ip, mac, mode, link))
9838       elif constants.DDM_REMOVE in nic_override:
9839         del args['nics'][-1]
9840
9841     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
9842     if self.op.disk_template:
9843       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
9844
9845     return env
9846
9847   def BuildHooksNodes(self):
9848     """Build hooks nodes.
9849
9850     """
9851     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
9852     return (nl, nl)
9853
9854   def CheckPrereq(self):
9855     """Check prerequisites.
9856
9857     This only checks the instance list against the existing names.
9858
9859     """
9860     # checking the new params on the primary/secondary nodes
9861
9862     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9863     cluster = self.cluster = self.cfg.GetClusterInfo()
9864     assert self.instance is not None, \
9865       "Cannot retrieve locked instance %s" % self.op.instance_name
9866     pnode = instance.primary_node
9867     nodelist = list(instance.all_nodes)
9868
9869     # OS change
9870     if self.op.os_name and not self.op.force:
9871       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
9872                       self.op.force_variant)
9873       instance_os = self.op.os_name
9874     else:
9875       instance_os = instance.os
9876
9877     if self.op.disk_template:
9878       if instance.disk_template == self.op.disk_template:
9879         raise errors.OpPrereqError("Instance already has disk template %s" %
9880                                    instance.disk_template, errors.ECODE_INVAL)
9881
9882       if (instance.disk_template,
9883           self.op.disk_template) not in self._DISK_CONVERSIONS:
9884         raise errors.OpPrereqError("Unsupported disk template conversion from"
9885                                    " %s to %s" % (instance.disk_template,
9886                                                   self.op.disk_template),
9887                                    errors.ECODE_INVAL)
9888       _CheckInstanceDown(self, instance, "cannot change disk template")
9889       if self.op.disk_template in constants.DTS_INT_MIRROR:
9890         if self.op.remote_node == pnode:
9891           raise errors.OpPrereqError("Given new secondary node %s is the same"
9892                                      " as the primary node of the instance" %
9893                                      self.op.remote_node, errors.ECODE_STATE)
9894         _CheckNodeOnline(self, self.op.remote_node)
9895         _CheckNodeNotDrained(self, self.op.remote_node)
9896         # FIXME: here we assume that the old instance type is DT_PLAIN
9897         assert instance.disk_template == constants.DT_PLAIN
9898         disks = [{constants.IDISK_SIZE: d.size,
9899                   constants.IDISK_VG: d.logical_id[0]}
9900                  for d in instance.disks]
9901         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
9902         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
9903
9904     # hvparams processing
9905     if self.op.hvparams:
9906       hv_type = instance.hypervisor
9907       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
9908       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
9909       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
9910
9911       # local check
9912       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
9913       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
9914       self.hv_new = hv_new # the new actual values
9915       self.hv_inst = i_hvdict # the new dict (without defaults)
9916     else:
9917       self.hv_new = self.hv_inst = {}
9918
9919     # beparams processing
9920     if self.op.beparams:
9921       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
9922                                    use_none=True)
9923       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
9924       be_new = cluster.SimpleFillBE(i_bedict)
9925       self.be_new = be_new # the new actual values
9926       self.be_inst = i_bedict # the new dict (without defaults)
9927     else:
9928       self.be_new = self.be_inst = {}
9929
9930     # osparams processing
9931     if self.op.osparams:
9932       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
9933       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
9934       self.os_inst = i_osdict # the new dict (without defaults)
9935     else:
9936       self.os_inst = {}
9937
9938     self.warn = []
9939
9940     if constants.BE_MEMORY in self.op.beparams and not self.op.force:
9941       mem_check_list = [pnode]
9942       if be_new[constants.BE_AUTO_BALANCE]:
9943         # either we changed auto_balance to yes or it was from before
9944         mem_check_list.extend(instance.secondary_nodes)
9945       instance_info = self.rpc.call_instance_info(pnode, instance.name,
9946                                                   instance.hypervisor)
9947       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
9948                                          instance.hypervisor)
9949       pninfo = nodeinfo[pnode]
9950       msg = pninfo.fail_msg
9951       if msg:
9952         # Assume the primary node is unreachable and go ahead
9953         self.warn.append("Can't get info from primary node %s: %s" %
9954                          (pnode,  msg))
9955       elif not isinstance(pninfo.payload.get('memory_free', None), int):
9956         self.warn.append("Node data from primary node %s doesn't contain"
9957                          " free memory information" % pnode)
9958       elif instance_info.fail_msg:
9959         self.warn.append("Can't get instance runtime information: %s" %
9960                         instance_info.fail_msg)
9961       else:
9962         if instance_info.payload:
9963           current_mem = int(instance_info.payload['memory'])
9964         else:
9965           # Assume instance not running
9966           # (there is a slight race condition here, but it's not very probable,
9967           # and we have no other way to check)
9968           current_mem = 0
9969         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
9970                     pninfo.payload['memory_free'])
9971         if miss_mem > 0:
9972           raise errors.OpPrereqError("This change will prevent the instance"
9973                                      " from starting, due to %d MB of memory"
9974                                      " missing on its primary node" % miss_mem,
9975                                      errors.ECODE_NORES)
9976
9977       if be_new[constants.BE_AUTO_BALANCE]:
9978         for node, nres in nodeinfo.items():
9979           if node not in instance.secondary_nodes:
9980             continue
9981           msg = nres.fail_msg
9982           if msg:
9983             self.warn.append("Can't get info from secondary node %s: %s" %
9984                              (node, msg))
9985           elif not isinstance(nres.payload.get('memory_free', None), int):
9986             self.warn.append("Secondary node %s didn't return free"
9987                              " memory information" % node)
9988           elif be_new[constants.BE_MEMORY] > nres.payload['memory_free']:
9989             self.warn.append("Not enough memory to failover instance to"
9990                              " secondary node %s" % node)
9991
9992     # NIC processing
9993     self.nic_pnew = {}
9994     self.nic_pinst = {}
9995     for nic_op, nic_dict in self.op.nics:
9996       if nic_op == constants.DDM_REMOVE:
9997         if not instance.nics:
9998           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
9999                                      errors.ECODE_INVAL)
10000         continue
10001       if nic_op != constants.DDM_ADD:
10002         # an existing nic
10003         if not instance.nics:
10004           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10005                                      " no NICs" % nic_op,
10006                                      errors.ECODE_INVAL)
10007         if nic_op < 0 or nic_op >= len(instance.nics):
10008           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10009                                      " are 0 to %d" %
10010                                      (nic_op, len(instance.nics) - 1),
10011                                      errors.ECODE_INVAL)
10012         old_nic_params = instance.nics[nic_op].nicparams
10013         old_nic_ip = instance.nics[nic_op].ip
10014       else:
10015         old_nic_params = {}
10016         old_nic_ip = None
10017
10018       update_params_dict = dict([(key, nic_dict[key])
10019                                  for key in constants.NICS_PARAMETERS
10020                                  if key in nic_dict])
10021
10022       if 'bridge' in nic_dict:
10023         update_params_dict[constants.NIC_LINK] = nic_dict['bridge']
10024
10025       new_nic_params = _GetUpdatedParams(old_nic_params,
10026                                          update_params_dict)
10027       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10028       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10029       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10030       self.nic_pinst[nic_op] = new_nic_params
10031       self.nic_pnew[nic_op] = new_filled_nic_params
10032       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10033
10034       if new_nic_mode == constants.NIC_MODE_BRIDGED:
10035         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10036         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10037         if msg:
10038           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10039           if self.op.force:
10040             self.warn.append(msg)
10041           else:
10042             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10043       if new_nic_mode == constants.NIC_MODE_ROUTED:
10044         if constants.INIC_IP in nic_dict:
10045           nic_ip = nic_dict[constants.INIC_IP]
10046         else:
10047           nic_ip = old_nic_ip
10048         if nic_ip is None:
10049           raise errors.OpPrereqError('Cannot set the nic ip to None'
10050                                      ' on a routed nic', errors.ECODE_INVAL)
10051       if constants.INIC_MAC in nic_dict:
10052         nic_mac = nic_dict[constants.INIC_MAC]
10053         if nic_mac is None:
10054           raise errors.OpPrereqError('Cannot set the nic mac to None',
10055                                      errors.ECODE_INVAL)
10056         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10057           # otherwise generate the mac
10058           nic_dict[constants.INIC_MAC] = \
10059             self.cfg.GenerateMAC(self.proc.GetECId())
10060         else:
10061           # or validate/reserve the current one
10062           try:
10063             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10064           except errors.ReservationError:
10065             raise errors.OpPrereqError("MAC address %s already in use"
10066                                        " in cluster" % nic_mac,
10067                                        errors.ECODE_NOTUNIQUE)
10068
10069     # DISK processing
10070     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10071       raise errors.OpPrereqError("Disk operations not supported for"
10072                                  " diskless instances",
10073                                  errors.ECODE_INVAL)
10074     for disk_op, _ in self.op.disks:
10075       if disk_op == constants.DDM_REMOVE:
10076         if len(instance.disks) == 1:
10077           raise errors.OpPrereqError("Cannot remove the last disk of"
10078                                      " an instance", errors.ECODE_INVAL)
10079         _CheckInstanceDown(self, instance, "cannot remove disks")
10080
10081       if (disk_op == constants.DDM_ADD and
10082           len(instance.disks) >= constants.MAX_DISKS):
10083         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10084                                    " add more" % constants.MAX_DISKS,
10085                                    errors.ECODE_STATE)
10086       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10087         # an existing disk
10088         if disk_op < 0 or disk_op >= len(instance.disks):
10089           raise errors.OpPrereqError("Invalid disk index %s, valid values"
10090                                      " are 0 to %d" %
10091                                      (disk_op, len(instance.disks)),
10092                                      errors.ECODE_INVAL)
10093
10094     return
10095
10096   def _ConvertPlainToDrbd(self, feedback_fn):
10097     """Converts an instance from plain to drbd.
10098
10099     """
10100     feedback_fn("Converting template to drbd")
10101     instance = self.instance
10102     pnode = instance.primary_node
10103     snode = self.op.remote_node
10104
10105     # create a fake disk info for _GenerateDiskTemplate
10106     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10107                   constants.IDISK_VG: d.logical_id[0]}
10108                  for d in instance.disks]
10109     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10110                                       instance.name, pnode, [snode],
10111                                       disk_info, None, None, 0, feedback_fn)
10112     info = _GetInstanceInfoText(instance)
10113     feedback_fn("Creating aditional volumes...")
10114     # first, create the missing data and meta devices
10115     for disk in new_disks:
10116       # unfortunately this is... not too nice
10117       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10118                             info, True)
10119       for child in disk.children:
10120         _CreateSingleBlockDev(self, snode, instance, child, info, True)
10121     # at this stage, all new LVs have been created, we can rename the
10122     # old ones
10123     feedback_fn("Renaming original volumes...")
10124     rename_list = [(o, n.children[0].logical_id)
10125                    for (o, n) in zip(instance.disks, new_disks)]
10126     result = self.rpc.call_blockdev_rename(pnode, rename_list)
10127     result.Raise("Failed to rename original LVs")
10128
10129     feedback_fn("Initializing DRBD devices...")
10130     # all child devices are in place, we can now create the DRBD devices
10131     for disk in new_disks:
10132       for node in [pnode, snode]:
10133         f_create = node == pnode
10134         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10135
10136     # at this point, the instance has been modified
10137     instance.disk_template = constants.DT_DRBD8
10138     instance.disks = new_disks
10139     self.cfg.Update(instance, feedback_fn)
10140
10141     # disks are created, waiting for sync
10142     disk_abort = not _WaitForSync(self, instance)
10143     if disk_abort:
10144       raise errors.OpExecError("There are some degraded disks for"
10145                                " this instance, please cleanup manually")
10146
10147   def _ConvertDrbdToPlain(self, feedback_fn):
10148     """Converts an instance from drbd to plain.
10149
10150     """
10151     instance = self.instance
10152     assert len(instance.secondary_nodes) == 1
10153     pnode = instance.primary_node
10154     snode = instance.secondary_nodes[0]
10155     feedback_fn("Converting template to plain")
10156
10157     old_disks = instance.disks
10158     new_disks = [d.children[0] for d in old_disks]
10159
10160     # copy over size and mode
10161     for parent, child in zip(old_disks, new_disks):
10162       child.size = parent.size
10163       child.mode = parent.mode
10164
10165     # update instance structure
10166     instance.disks = new_disks
10167     instance.disk_template = constants.DT_PLAIN
10168     self.cfg.Update(instance, feedback_fn)
10169
10170     feedback_fn("Removing volumes on the secondary node...")
10171     for disk in old_disks:
10172       self.cfg.SetDiskID(disk, snode)
10173       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10174       if msg:
10175         self.LogWarning("Could not remove block device %s on node %s,"
10176                         " continuing anyway: %s", disk.iv_name, snode, msg)
10177
10178     feedback_fn("Removing unneeded volumes on the primary node...")
10179     for idx, disk in enumerate(old_disks):
10180       meta = disk.children[1]
10181       self.cfg.SetDiskID(meta, pnode)
10182       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10183       if msg:
10184         self.LogWarning("Could not remove metadata for disk %d on node %s,"
10185                         " continuing anyway: %s", idx, pnode, msg)
10186
10187   def Exec(self, feedback_fn):
10188     """Modifies an instance.
10189
10190     All parameters take effect only at the next restart of the instance.
10191
10192     """
10193     # Process here the warnings from CheckPrereq, as we don't have a
10194     # feedback_fn there.
10195     for warn in self.warn:
10196       feedback_fn("WARNING: %s" % warn)
10197
10198     result = []
10199     instance = self.instance
10200     # disk changes
10201     for disk_op, disk_dict in self.op.disks:
10202       if disk_op == constants.DDM_REMOVE:
10203         # remove the last disk
10204         device = instance.disks.pop()
10205         device_idx = len(instance.disks)
10206         for node, disk in device.ComputeNodeTree(instance.primary_node):
10207           self.cfg.SetDiskID(disk, node)
10208           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
10209           if msg:
10210             self.LogWarning("Could not remove disk/%d on node %s: %s,"
10211                             " continuing anyway", device_idx, node, msg)
10212         result.append(("disk/%d" % device_idx, "remove"))
10213       elif disk_op == constants.DDM_ADD:
10214         # add a new disk
10215         if instance.disk_template in (constants.DT_FILE,
10216                                         constants.DT_SHARED_FILE):
10217           file_driver, file_path = instance.disks[0].logical_id
10218           file_path = os.path.dirname(file_path)
10219         else:
10220           file_driver = file_path = None
10221         disk_idx_base = len(instance.disks)
10222         new_disk = _GenerateDiskTemplate(self,
10223                                          instance.disk_template,
10224                                          instance.name, instance.primary_node,
10225                                          instance.secondary_nodes,
10226                                          [disk_dict],
10227                                          file_path,
10228                                          file_driver,
10229                                          disk_idx_base, feedback_fn)[0]
10230         instance.disks.append(new_disk)
10231         info = _GetInstanceInfoText(instance)
10232
10233         logging.info("Creating volume %s for instance %s",
10234                      new_disk.iv_name, instance.name)
10235         # Note: this needs to be kept in sync with _CreateDisks
10236         #HARDCODE
10237         for node in instance.all_nodes:
10238           f_create = node == instance.primary_node
10239           try:
10240             _CreateBlockDev(self, node, instance, new_disk,
10241                             f_create, info, f_create)
10242           except errors.OpExecError, err:
10243             self.LogWarning("Failed to create volume %s (%s) on"
10244                             " node %s: %s",
10245                             new_disk.iv_name, new_disk, node, err)
10246         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
10247                        (new_disk.size, new_disk.mode)))
10248       else:
10249         # change a given disk
10250         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
10251         result.append(("disk.mode/%d" % disk_op,
10252                        disk_dict[constants.IDISK_MODE]))
10253
10254     if self.op.disk_template:
10255       r_shut = _ShutdownInstanceDisks(self, instance)
10256       if not r_shut:
10257         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
10258                                  " proceed with disk template conversion")
10259       mode = (instance.disk_template, self.op.disk_template)
10260       try:
10261         self._DISK_CONVERSIONS[mode](self, feedback_fn)
10262       except:
10263         self.cfg.ReleaseDRBDMinors(instance.name)
10264         raise
10265       result.append(("disk_template", self.op.disk_template))
10266
10267     # NIC changes
10268     for nic_op, nic_dict in self.op.nics:
10269       if nic_op == constants.DDM_REMOVE:
10270         # remove the last nic
10271         del instance.nics[-1]
10272         result.append(("nic.%d" % len(instance.nics), "remove"))
10273       elif nic_op == constants.DDM_ADD:
10274         # mac and bridge should be set, by now
10275         mac = nic_dict[constants.INIC_MAC]
10276         ip = nic_dict.get(constants.INIC_IP, None)
10277         nicparams = self.nic_pinst[constants.DDM_ADD]
10278         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
10279         instance.nics.append(new_nic)
10280         result.append(("nic.%d" % (len(instance.nics) - 1),
10281                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
10282                        (new_nic.mac, new_nic.ip,
10283                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
10284                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
10285                        )))
10286       else:
10287         for key in (constants.INIC_MAC, constants.INIC_IP):
10288           if key in nic_dict:
10289             setattr(instance.nics[nic_op], key, nic_dict[key])
10290         if nic_op in self.nic_pinst:
10291           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
10292         for key, val in nic_dict.iteritems():
10293           result.append(("nic.%s/%d" % (key, nic_op), val))
10294
10295     # hvparams changes
10296     if self.op.hvparams:
10297       instance.hvparams = self.hv_inst
10298       for key, val in self.op.hvparams.iteritems():
10299         result.append(("hv/%s" % key, val))
10300
10301     # beparams changes
10302     if self.op.beparams:
10303       instance.beparams = self.be_inst
10304       for key, val in self.op.beparams.iteritems():
10305         result.append(("be/%s" % key, val))
10306
10307     # OS change
10308     if self.op.os_name:
10309       instance.os = self.op.os_name
10310
10311     # osparams changes
10312     if self.op.osparams:
10313       instance.osparams = self.os_inst
10314       for key, val in self.op.osparams.iteritems():
10315         result.append(("os/%s" % key, val))
10316
10317     self.cfg.Update(instance, feedback_fn)
10318
10319     return result
10320
10321   _DISK_CONVERSIONS = {
10322     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
10323     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
10324     }
10325
10326
10327 class LUBackupQuery(NoHooksLU):
10328   """Query the exports list
10329
10330   """
10331   REQ_BGL = False
10332
10333   def ExpandNames(self):
10334     self.needed_locks = {}
10335     self.share_locks[locking.LEVEL_NODE] = 1
10336     if not self.op.nodes:
10337       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10338     else:
10339       self.needed_locks[locking.LEVEL_NODE] = \
10340         _GetWantedNodes(self, self.op.nodes)
10341
10342   def Exec(self, feedback_fn):
10343     """Compute the list of all the exported system images.
10344
10345     @rtype: dict
10346     @return: a dictionary with the structure node->(export-list)
10347         where export-list is a list of the instances exported on
10348         that node.
10349
10350     """
10351     self.nodes = self.acquired_locks[locking.LEVEL_NODE]
10352     rpcresult = self.rpc.call_export_list(self.nodes)
10353     result = {}
10354     for node in rpcresult:
10355       if rpcresult[node].fail_msg:
10356         result[node] = False
10357       else:
10358         result[node] = rpcresult[node].payload
10359
10360     return result
10361
10362
10363 class LUBackupPrepare(NoHooksLU):
10364   """Prepares an instance for an export and returns useful information.
10365
10366   """
10367   REQ_BGL = False
10368
10369   def ExpandNames(self):
10370     self._ExpandAndLockInstance()
10371
10372   def CheckPrereq(self):
10373     """Check prerequisites.
10374
10375     """
10376     instance_name = self.op.instance_name
10377
10378     self.instance = self.cfg.GetInstanceInfo(instance_name)
10379     assert self.instance is not None, \
10380           "Cannot retrieve locked instance %s" % self.op.instance_name
10381     _CheckNodeOnline(self, self.instance.primary_node)
10382
10383     self._cds = _GetClusterDomainSecret()
10384
10385   def Exec(self, feedback_fn):
10386     """Prepares an instance for an export.
10387
10388     """
10389     instance = self.instance
10390
10391     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10392       salt = utils.GenerateSecret(8)
10393
10394       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
10395       result = self.rpc.call_x509_cert_create(instance.primary_node,
10396                                               constants.RIE_CERT_VALIDITY)
10397       result.Raise("Can't create X509 key and certificate on %s" % result.node)
10398
10399       (name, cert_pem) = result.payload
10400
10401       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
10402                                              cert_pem)
10403
10404       return {
10405         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
10406         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
10407                           salt),
10408         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
10409         }
10410
10411     return None
10412
10413
10414 class LUBackupExport(LogicalUnit):
10415   """Export an instance to an image in the cluster.
10416
10417   """
10418   HPATH = "instance-export"
10419   HTYPE = constants.HTYPE_INSTANCE
10420   REQ_BGL = False
10421
10422   def CheckArguments(self):
10423     """Check the arguments.
10424
10425     """
10426     self.x509_key_name = self.op.x509_key_name
10427     self.dest_x509_ca_pem = self.op.destination_x509_ca
10428
10429     if self.op.mode == constants.EXPORT_MODE_REMOTE:
10430       if not self.x509_key_name:
10431         raise errors.OpPrereqError("Missing X509 key name for encryption",
10432                                    errors.ECODE_INVAL)
10433
10434       if not self.dest_x509_ca_pem:
10435         raise errors.OpPrereqError("Missing destination X509 CA",
10436                                    errors.ECODE_INVAL)
10437
10438   def ExpandNames(self):
10439     self._ExpandAndLockInstance()
10440
10441     # Lock all nodes for local exports
10442     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10443       # FIXME: lock only instance primary and destination node
10444       #
10445       # Sad but true, for now we have do lock all nodes, as we don't know where
10446       # the previous export might be, and in this LU we search for it and
10447       # remove it from its current node. In the future we could fix this by:
10448       #  - making a tasklet to search (share-lock all), then create the
10449       #    new one, then one to remove, after
10450       #  - removing the removal operation altogether
10451       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10452
10453   def DeclareLocks(self, level):
10454     """Last minute lock declaration."""
10455     # All nodes are locked anyway, so nothing to do here.
10456
10457   def BuildHooksEnv(self):
10458     """Build hooks env.
10459
10460     This will run on the master, primary node and target node.
10461
10462     """
10463     env = {
10464       "EXPORT_MODE": self.op.mode,
10465       "EXPORT_NODE": self.op.target_node,
10466       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
10467       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
10468       # TODO: Generic function for boolean env variables
10469       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
10470       }
10471
10472     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10473
10474     return env
10475
10476   def BuildHooksNodes(self):
10477     """Build hooks nodes.
10478
10479     """
10480     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
10481
10482     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10483       nl.append(self.op.target_node)
10484
10485     return (nl, nl)
10486
10487   def CheckPrereq(self):
10488     """Check prerequisites.
10489
10490     This checks that the instance and node names are valid.
10491
10492     """
10493     instance_name = self.op.instance_name
10494
10495     self.instance = self.cfg.GetInstanceInfo(instance_name)
10496     assert self.instance is not None, \
10497           "Cannot retrieve locked instance %s" % self.op.instance_name
10498     _CheckNodeOnline(self, self.instance.primary_node)
10499
10500     if (self.op.remove_instance and self.instance.admin_up and
10501         not self.op.shutdown):
10502       raise errors.OpPrereqError("Can not remove instance without shutting it"
10503                                  " down before")
10504
10505     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10506       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
10507       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
10508       assert self.dst_node is not None
10509
10510       _CheckNodeOnline(self, self.dst_node.name)
10511       _CheckNodeNotDrained(self, self.dst_node.name)
10512
10513       self._cds = None
10514       self.dest_disk_info = None
10515       self.dest_x509_ca = None
10516
10517     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10518       self.dst_node = None
10519
10520       if len(self.op.target_node) != len(self.instance.disks):
10521         raise errors.OpPrereqError(("Received destination information for %s"
10522                                     " disks, but instance %s has %s disks") %
10523                                    (len(self.op.target_node), instance_name,
10524                                     len(self.instance.disks)),
10525                                    errors.ECODE_INVAL)
10526
10527       cds = _GetClusterDomainSecret()
10528
10529       # Check X509 key name
10530       try:
10531         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
10532       except (TypeError, ValueError), err:
10533         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
10534
10535       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
10536         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
10537                                    errors.ECODE_INVAL)
10538
10539       # Load and verify CA
10540       try:
10541         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
10542       except OpenSSL.crypto.Error, err:
10543         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
10544                                    (err, ), errors.ECODE_INVAL)
10545
10546       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
10547       if errcode is not None:
10548         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
10549                                    (msg, ), errors.ECODE_INVAL)
10550
10551       self.dest_x509_ca = cert
10552
10553       # Verify target information
10554       disk_info = []
10555       for idx, disk_data in enumerate(self.op.target_node):
10556         try:
10557           (host, port, magic) = \
10558             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
10559         except errors.GenericError, err:
10560           raise errors.OpPrereqError("Target info for disk %s: %s" %
10561                                      (idx, err), errors.ECODE_INVAL)
10562
10563         disk_info.append((host, port, magic))
10564
10565       assert len(disk_info) == len(self.op.target_node)
10566       self.dest_disk_info = disk_info
10567
10568     else:
10569       raise errors.ProgrammerError("Unhandled export mode %r" %
10570                                    self.op.mode)
10571
10572     # instance disk type verification
10573     # TODO: Implement export support for file-based disks
10574     for disk in self.instance.disks:
10575       if disk.dev_type == constants.LD_FILE:
10576         raise errors.OpPrereqError("Export not supported for instances with"
10577                                    " file-based disks", errors.ECODE_INVAL)
10578
10579   def _CleanupExports(self, feedback_fn):
10580     """Removes exports of current instance from all other nodes.
10581
10582     If an instance in a cluster with nodes A..D was exported to node C, its
10583     exports will be removed from the nodes A, B and D.
10584
10585     """
10586     assert self.op.mode != constants.EXPORT_MODE_REMOTE
10587
10588     nodelist = self.cfg.GetNodeList()
10589     nodelist.remove(self.dst_node.name)
10590
10591     # on one-node clusters nodelist will be empty after the removal
10592     # if we proceed the backup would be removed because OpBackupQuery
10593     # substitutes an empty list with the full cluster node list.
10594     iname = self.instance.name
10595     if nodelist:
10596       feedback_fn("Removing old exports for instance %s" % iname)
10597       exportlist = self.rpc.call_export_list(nodelist)
10598       for node in exportlist:
10599         if exportlist[node].fail_msg:
10600           continue
10601         if iname in exportlist[node].payload:
10602           msg = self.rpc.call_export_remove(node, iname).fail_msg
10603           if msg:
10604             self.LogWarning("Could not remove older export for instance %s"
10605                             " on node %s: %s", iname, node, msg)
10606
10607   def Exec(self, feedback_fn):
10608     """Export an instance to an image in the cluster.
10609
10610     """
10611     assert self.op.mode in constants.EXPORT_MODES
10612
10613     instance = self.instance
10614     src_node = instance.primary_node
10615
10616     if self.op.shutdown:
10617       # shutdown the instance, but not the disks
10618       feedback_fn("Shutting down instance %s" % instance.name)
10619       result = self.rpc.call_instance_shutdown(src_node, instance,
10620                                                self.op.shutdown_timeout)
10621       # TODO: Maybe ignore failures if ignore_remove_failures is set
10622       result.Raise("Could not shutdown instance %s on"
10623                    " node %s" % (instance.name, src_node))
10624
10625     # set the disks ID correctly since call_instance_start needs the
10626     # correct drbd minor to create the symlinks
10627     for disk in instance.disks:
10628       self.cfg.SetDiskID(disk, src_node)
10629
10630     activate_disks = (not instance.admin_up)
10631
10632     if activate_disks:
10633       # Activate the instance disks if we'exporting a stopped instance
10634       feedback_fn("Activating disks for %s" % instance.name)
10635       _StartInstanceDisks(self, instance, None)
10636
10637     try:
10638       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
10639                                                      instance)
10640
10641       helper.CreateSnapshots()
10642       try:
10643         if (self.op.shutdown and instance.admin_up and
10644             not self.op.remove_instance):
10645           assert not activate_disks
10646           feedback_fn("Starting instance %s" % instance.name)
10647           result = self.rpc.call_instance_start(src_node, instance, None, None)
10648           msg = result.fail_msg
10649           if msg:
10650             feedback_fn("Failed to start instance: %s" % msg)
10651             _ShutdownInstanceDisks(self, instance)
10652             raise errors.OpExecError("Could not start instance: %s" % msg)
10653
10654         if self.op.mode == constants.EXPORT_MODE_LOCAL:
10655           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
10656         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
10657           connect_timeout = constants.RIE_CONNECT_TIMEOUT
10658           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
10659
10660           (key_name, _, _) = self.x509_key_name
10661
10662           dest_ca_pem = \
10663             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
10664                                             self.dest_x509_ca)
10665
10666           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
10667                                                      key_name, dest_ca_pem,
10668                                                      timeouts)
10669       finally:
10670         helper.Cleanup()
10671
10672       # Check for backwards compatibility
10673       assert len(dresults) == len(instance.disks)
10674       assert compat.all(isinstance(i, bool) for i in dresults), \
10675              "Not all results are boolean: %r" % dresults
10676
10677     finally:
10678       if activate_disks:
10679         feedback_fn("Deactivating disks for %s" % instance.name)
10680         _ShutdownInstanceDisks(self, instance)
10681
10682     if not (compat.all(dresults) and fin_resu):
10683       failures = []
10684       if not fin_resu:
10685         failures.append("export finalization")
10686       if not compat.all(dresults):
10687         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
10688                                if not dsk)
10689         failures.append("disk export: disk(s) %s" % fdsk)
10690
10691       raise errors.OpExecError("Export failed, errors in %s" %
10692                                utils.CommaJoin(failures))
10693
10694     # At this point, the export was successful, we can cleanup/finish
10695
10696     # Remove instance if requested
10697     if self.op.remove_instance:
10698       feedback_fn("Removing instance %s" % instance.name)
10699       _RemoveInstance(self, feedback_fn, instance,
10700                       self.op.ignore_remove_failures)
10701
10702     if self.op.mode == constants.EXPORT_MODE_LOCAL:
10703       self._CleanupExports(feedback_fn)
10704
10705     return fin_resu, dresults
10706
10707
10708 class LUBackupRemove(NoHooksLU):
10709   """Remove exports related to the named instance.
10710
10711   """
10712   REQ_BGL = False
10713
10714   def ExpandNames(self):
10715     self.needed_locks = {}
10716     # We need all nodes to be locked in order for RemoveExport to work, but we
10717     # don't need to lock the instance itself, as nothing will happen to it (and
10718     # we can remove exports also for a removed instance)
10719     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
10720
10721   def Exec(self, feedback_fn):
10722     """Remove any export.
10723
10724     """
10725     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
10726     # If the instance was not found we'll try with the name that was passed in.
10727     # This will only work if it was an FQDN, though.
10728     fqdn_warn = False
10729     if not instance_name:
10730       fqdn_warn = True
10731       instance_name = self.op.instance_name
10732
10733     locked_nodes = self.acquired_locks[locking.LEVEL_NODE]
10734     exportlist = self.rpc.call_export_list(locked_nodes)
10735     found = False
10736     for node in exportlist:
10737       msg = exportlist[node].fail_msg
10738       if msg:
10739         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
10740         continue
10741       if instance_name in exportlist[node].payload:
10742         found = True
10743         result = self.rpc.call_export_remove(node, instance_name)
10744         msg = result.fail_msg
10745         if msg:
10746           logging.error("Could not remove export for instance %s"
10747                         " on node %s: %s", instance_name, node, msg)
10748
10749     if fqdn_warn and not found:
10750       feedback_fn("Export not found. If trying to remove an export belonging"
10751                   " to a deleted instance please use its Fully Qualified"
10752                   " Domain Name.")
10753
10754
10755 class LUGroupAdd(LogicalUnit):
10756   """Logical unit for creating node groups.
10757
10758   """
10759   HPATH = "group-add"
10760   HTYPE = constants.HTYPE_GROUP
10761   REQ_BGL = False
10762
10763   def ExpandNames(self):
10764     # We need the new group's UUID here so that we can create and acquire the
10765     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
10766     # that it should not check whether the UUID exists in the configuration.
10767     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
10768     self.needed_locks = {}
10769     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
10770
10771   def CheckPrereq(self):
10772     """Check prerequisites.
10773
10774     This checks that the given group name is not an existing node group
10775     already.
10776
10777     """
10778     try:
10779       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10780     except errors.OpPrereqError:
10781       pass
10782     else:
10783       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
10784                                  " node group (UUID: %s)" %
10785                                  (self.op.group_name, existing_uuid),
10786                                  errors.ECODE_EXISTS)
10787
10788     if self.op.ndparams:
10789       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
10790
10791   def BuildHooksEnv(self):
10792     """Build hooks env.
10793
10794     """
10795     return {
10796       "GROUP_NAME": self.op.group_name,
10797       }
10798
10799   def BuildHooksNodes(self):
10800     """Build hooks nodes.
10801
10802     """
10803     mn = self.cfg.GetMasterNode()
10804     return ([mn], [mn])
10805
10806   def Exec(self, feedback_fn):
10807     """Add the node group to the cluster.
10808
10809     """
10810     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
10811                                   uuid=self.group_uuid,
10812                                   alloc_policy=self.op.alloc_policy,
10813                                   ndparams=self.op.ndparams)
10814
10815     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
10816     del self.remove_locks[locking.LEVEL_NODEGROUP]
10817
10818
10819 class LUGroupAssignNodes(NoHooksLU):
10820   """Logical unit for assigning nodes to groups.
10821
10822   """
10823   REQ_BGL = False
10824
10825   def ExpandNames(self):
10826     # These raise errors.OpPrereqError on their own:
10827     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
10828     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
10829
10830     # We want to lock all the affected nodes and groups. We have readily
10831     # available the list of nodes, and the *destination* group. To gather the
10832     # list of "source" groups, we need to fetch node information.
10833     self.node_data = self.cfg.GetAllNodesInfo()
10834     affected_groups = set(self.node_data[node].group for node in self.op.nodes)
10835     affected_groups.add(self.group_uuid)
10836
10837     self.needed_locks = {
10838       locking.LEVEL_NODEGROUP: list(affected_groups),
10839       locking.LEVEL_NODE: self.op.nodes,
10840       }
10841
10842   def CheckPrereq(self):
10843     """Check prerequisites.
10844
10845     """
10846     self.group = self.cfg.GetNodeGroup(self.group_uuid)
10847     instance_data = self.cfg.GetAllInstancesInfo()
10848
10849     if self.group is None:
10850       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
10851                                (self.op.group_name, self.group_uuid))
10852
10853     (new_splits, previous_splits) = \
10854       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
10855                                              for node in self.op.nodes],
10856                                             self.node_data, instance_data)
10857
10858     if new_splits:
10859       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
10860
10861       if not self.op.force:
10862         raise errors.OpExecError("The following instances get split by this"
10863                                  " change and --force was not given: %s" %
10864                                  fmt_new_splits)
10865       else:
10866         self.LogWarning("This operation will split the following instances: %s",
10867                         fmt_new_splits)
10868
10869         if previous_splits:
10870           self.LogWarning("In addition, these already-split instances continue"
10871                           " to be split across groups: %s",
10872                           utils.CommaJoin(utils.NiceSort(previous_splits)))
10873
10874   def Exec(self, feedback_fn):
10875     """Assign nodes to a new group.
10876
10877     """
10878     for node in self.op.nodes:
10879       self.node_data[node].group = self.group_uuid
10880
10881     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
10882
10883   @staticmethod
10884   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
10885     """Check for split instances after a node assignment.
10886
10887     This method considers a series of node assignments as an atomic operation,
10888     and returns information about split instances after applying the set of
10889     changes.
10890
10891     In particular, it returns information about newly split instances, and
10892     instances that were already split, and remain so after the change.
10893
10894     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
10895     considered.
10896
10897     @type changes: list of (node_name, new_group_uuid) pairs.
10898     @param changes: list of node assignments to consider.
10899     @param node_data: a dict with data for all nodes
10900     @param instance_data: a dict with all instances to consider
10901     @rtype: a two-tuple
10902     @return: a list of instances that were previously okay and result split as a
10903       consequence of this change, and a list of instances that were previously
10904       split and this change does not fix.
10905
10906     """
10907     changed_nodes = dict((node, group) for node, group in changes
10908                          if node_data[node].group != group)
10909
10910     all_split_instances = set()
10911     previously_split_instances = set()
10912
10913     def InstanceNodes(instance):
10914       return [instance.primary_node] + list(instance.secondary_nodes)
10915
10916     for inst in instance_data.values():
10917       if inst.disk_template not in constants.DTS_INT_MIRROR:
10918         continue
10919
10920       instance_nodes = InstanceNodes(inst)
10921
10922       if len(set(node_data[node].group for node in instance_nodes)) > 1:
10923         previously_split_instances.add(inst.name)
10924
10925       if len(set(changed_nodes.get(node, node_data[node].group)
10926                  for node in instance_nodes)) > 1:
10927         all_split_instances.add(inst.name)
10928
10929     return (list(all_split_instances - previously_split_instances),
10930             list(previously_split_instances & all_split_instances))
10931
10932
10933 class _GroupQuery(_QueryBase):
10934   FIELDS = query.GROUP_FIELDS
10935
10936   def ExpandNames(self, lu):
10937     lu.needed_locks = {}
10938
10939     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
10940     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
10941
10942     if not self.names:
10943       self.wanted = [name_to_uuid[name]
10944                      for name in utils.NiceSort(name_to_uuid.keys())]
10945     else:
10946       # Accept names to be either names or UUIDs.
10947       missing = []
10948       self.wanted = []
10949       all_uuid = frozenset(self._all_groups.keys())
10950
10951       for name in self.names:
10952         if name in all_uuid:
10953           self.wanted.append(name)
10954         elif name in name_to_uuid:
10955           self.wanted.append(name_to_uuid[name])
10956         else:
10957           missing.append(name)
10958
10959       if missing:
10960         raise errors.OpPrereqError("Some groups do not exist: %s" %
10961                                    utils.CommaJoin(missing),
10962                                    errors.ECODE_NOENT)
10963
10964   def DeclareLocks(self, lu, level):
10965     pass
10966
10967   def _GetQueryData(self, lu):
10968     """Computes the list of node groups and their attributes.
10969
10970     """
10971     do_nodes = query.GQ_NODE in self.requested_data
10972     do_instances = query.GQ_INST in self.requested_data
10973
10974     group_to_nodes = None
10975     group_to_instances = None
10976
10977     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
10978     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
10979     # latter GetAllInstancesInfo() is not enough, for we have to go through
10980     # instance->node. Hence, we will need to process nodes even if we only need
10981     # instance information.
10982     if do_nodes or do_instances:
10983       all_nodes = lu.cfg.GetAllNodesInfo()
10984       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
10985       node_to_group = {}
10986
10987       for node in all_nodes.values():
10988         if node.group in group_to_nodes:
10989           group_to_nodes[node.group].append(node.name)
10990           node_to_group[node.name] = node.group
10991
10992       if do_instances:
10993         all_instances = lu.cfg.GetAllInstancesInfo()
10994         group_to_instances = dict((uuid, []) for uuid in self.wanted)
10995
10996         for instance in all_instances.values():
10997           node = instance.primary_node
10998           if node in node_to_group:
10999             group_to_instances[node_to_group[node]].append(instance.name)
11000
11001         if not do_nodes:
11002           # Do not pass on node information if it was not requested.
11003           group_to_nodes = None
11004
11005     return query.GroupQueryData([self._all_groups[uuid]
11006                                  for uuid in self.wanted],
11007                                 group_to_nodes, group_to_instances)
11008
11009
11010 class LUGroupQuery(NoHooksLU):
11011   """Logical unit for querying node groups.
11012
11013   """
11014   REQ_BGL = False
11015
11016   def CheckArguments(self):
11017     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11018                           self.op.output_fields, False)
11019
11020   def ExpandNames(self):
11021     self.gq.ExpandNames(self)
11022
11023   def Exec(self, feedback_fn):
11024     return self.gq.OldStyleQuery(self)
11025
11026
11027 class LUGroupSetParams(LogicalUnit):
11028   """Modifies the parameters of a node group.
11029
11030   """
11031   HPATH = "group-modify"
11032   HTYPE = constants.HTYPE_GROUP
11033   REQ_BGL = False
11034
11035   def CheckArguments(self):
11036     all_changes = [
11037       self.op.ndparams,
11038       self.op.alloc_policy,
11039       ]
11040
11041     if all_changes.count(None) == len(all_changes):
11042       raise errors.OpPrereqError("Please pass at least one modification",
11043                                  errors.ECODE_INVAL)
11044
11045   def ExpandNames(self):
11046     # This raises errors.OpPrereqError on its own:
11047     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11048
11049     self.needed_locks = {
11050       locking.LEVEL_NODEGROUP: [self.group_uuid],
11051       }
11052
11053   def CheckPrereq(self):
11054     """Check prerequisites.
11055
11056     """
11057     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11058
11059     if self.group is None:
11060       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11061                                (self.op.group_name, self.group_uuid))
11062
11063     if self.op.ndparams:
11064       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11065       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11066       self.new_ndparams = new_ndparams
11067
11068   def BuildHooksEnv(self):
11069     """Build hooks env.
11070
11071     """
11072     return {
11073       "GROUP_NAME": self.op.group_name,
11074       "NEW_ALLOC_POLICY": self.op.alloc_policy,
11075       }
11076
11077   def BuildHooksNodes(self):
11078     """Build hooks nodes.
11079
11080     """
11081     mn = self.cfg.GetMasterNode()
11082     return ([mn], [mn])
11083
11084   def Exec(self, feedback_fn):
11085     """Modifies the node group.
11086
11087     """
11088     result = []
11089
11090     if self.op.ndparams:
11091       self.group.ndparams = self.new_ndparams
11092       result.append(("ndparams", str(self.group.ndparams)))
11093
11094     if self.op.alloc_policy:
11095       self.group.alloc_policy = self.op.alloc_policy
11096
11097     self.cfg.Update(self.group, feedback_fn)
11098     return result
11099
11100
11101
11102 class LUGroupRemove(LogicalUnit):
11103   HPATH = "group-remove"
11104   HTYPE = constants.HTYPE_GROUP
11105   REQ_BGL = False
11106
11107   def ExpandNames(self):
11108     # This will raises errors.OpPrereqError on its own:
11109     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11110     self.needed_locks = {
11111       locking.LEVEL_NODEGROUP: [self.group_uuid],
11112       }
11113
11114   def CheckPrereq(self):
11115     """Check prerequisites.
11116
11117     This checks that the given group name exists as a node group, that is
11118     empty (i.e., contains no nodes), and that is not the last group of the
11119     cluster.
11120
11121     """
11122     # Verify that the group is empty.
11123     group_nodes = [node.name
11124                    for node in self.cfg.GetAllNodesInfo().values()
11125                    if node.group == self.group_uuid]
11126
11127     if group_nodes:
11128       raise errors.OpPrereqError("Group '%s' not empty, has the following"
11129                                  " nodes: %s" %
11130                                  (self.op.group_name,
11131                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
11132                                  errors.ECODE_STATE)
11133
11134     # Verify the cluster would not be left group-less.
11135     if len(self.cfg.GetNodeGroupList()) == 1:
11136       raise errors.OpPrereqError("Group '%s' is the only group,"
11137                                  " cannot be removed" %
11138                                  self.op.group_name,
11139                                  errors.ECODE_STATE)
11140
11141   def BuildHooksEnv(self):
11142     """Build hooks env.
11143
11144     """
11145     return {
11146       "GROUP_NAME": self.op.group_name,
11147       }
11148
11149   def BuildHooksNodes(self):
11150     """Build hooks nodes.
11151
11152     """
11153     mn = self.cfg.GetMasterNode()
11154     return ([mn], [mn])
11155
11156   def Exec(self, feedback_fn):
11157     """Remove the node group.
11158
11159     """
11160     try:
11161       self.cfg.RemoveNodeGroup(self.group_uuid)
11162     except errors.ConfigurationError:
11163       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
11164                                (self.op.group_name, self.group_uuid))
11165
11166     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11167
11168
11169 class LUGroupRename(LogicalUnit):
11170   HPATH = "group-rename"
11171   HTYPE = constants.HTYPE_GROUP
11172   REQ_BGL = False
11173
11174   def ExpandNames(self):
11175     # This raises errors.OpPrereqError on its own:
11176     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11177
11178     self.needed_locks = {
11179       locking.LEVEL_NODEGROUP: [self.group_uuid],
11180       }
11181
11182   def CheckPrereq(self):
11183     """Check prerequisites.
11184
11185     Ensures requested new name is not yet used.
11186
11187     """
11188     try:
11189       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
11190     except errors.OpPrereqError:
11191       pass
11192     else:
11193       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
11194                                  " node group (UUID: %s)" %
11195                                  (self.op.new_name, new_name_uuid),
11196                                  errors.ECODE_EXISTS)
11197
11198   def BuildHooksEnv(self):
11199     """Build hooks env.
11200
11201     """
11202     return {
11203       "OLD_NAME": self.op.group_name,
11204       "NEW_NAME": self.op.new_name,
11205       }
11206
11207   def BuildHooksNodes(self):
11208     """Build hooks nodes.
11209
11210     """
11211     mn = self.cfg.GetMasterNode()
11212
11213     all_nodes = self.cfg.GetAllNodesInfo()
11214     all_nodes.pop(mn, None)
11215
11216     run_nodes = [mn]
11217     run_nodes.extend(node.name for node in all_nodes.values()
11218                      if node.group == self.group_uuid)
11219
11220     return (run_nodes, run_nodes)
11221
11222   def Exec(self, feedback_fn):
11223     """Rename the node group.
11224
11225     """
11226     group = self.cfg.GetNodeGroup(self.group_uuid)
11227
11228     if group is None:
11229       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11230                                (self.op.group_name, self.group_uuid))
11231
11232     group.name = self.op.new_name
11233     self.cfg.Update(group, feedback_fn)
11234
11235     return self.op.new_name
11236
11237
11238 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
11239   """Generic tags LU.
11240
11241   This is an abstract class which is the parent of all the other tags LUs.
11242
11243   """
11244   def ExpandNames(self):
11245     self.group_uuid = None
11246     self.needed_locks = {}
11247     if self.op.kind == constants.TAG_NODE:
11248       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
11249       self.needed_locks[locking.LEVEL_NODE] = self.op.name
11250     elif self.op.kind == constants.TAG_INSTANCE:
11251       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
11252       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
11253     elif self.op.kind == constants.TAG_NODEGROUP:
11254       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
11255
11256     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
11257     # not possible to acquire the BGL based on opcode parameters)
11258
11259   def CheckPrereq(self):
11260     """Check prerequisites.
11261
11262     """
11263     if self.op.kind == constants.TAG_CLUSTER:
11264       self.target = self.cfg.GetClusterInfo()
11265     elif self.op.kind == constants.TAG_NODE:
11266       self.target = self.cfg.GetNodeInfo(self.op.name)
11267     elif self.op.kind == constants.TAG_INSTANCE:
11268       self.target = self.cfg.GetInstanceInfo(self.op.name)
11269     elif self.op.kind == constants.TAG_NODEGROUP:
11270       self.target = self.cfg.GetNodeGroup(self.group_uuid)
11271     else:
11272       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
11273                                  str(self.op.kind), errors.ECODE_INVAL)
11274
11275
11276 class LUTagsGet(TagsLU):
11277   """Returns the tags of a given object.
11278
11279   """
11280   REQ_BGL = False
11281
11282   def ExpandNames(self):
11283     TagsLU.ExpandNames(self)
11284
11285     # Share locks as this is only a read operation
11286     self.share_locks = dict.fromkeys(locking.LEVELS, 1)
11287
11288   def Exec(self, feedback_fn):
11289     """Returns the tag list.
11290
11291     """
11292     return list(self.target.GetTags())
11293
11294
11295 class LUTagsSearch(NoHooksLU):
11296   """Searches the tags for a given pattern.
11297
11298   """
11299   REQ_BGL = False
11300
11301   def ExpandNames(self):
11302     self.needed_locks = {}
11303
11304   def CheckPrereq(self):
11305     """Check prerequisites.
11306
11307     This checks the pattern passed for validity by compiling it.
11308
11309     """
11310     try:
11311       self.re = re.compile(self.op.pattern)
11312     except re.error, err:
11313       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
11314                                  (self.op.pattern, err), errors.ECODE_INVAL)
11315
11316   def Exec(self, feedback_fn):
11317     """Returns the tag list.
11318
11319     """
11320     cfg = self.cfg
11321     tgts = [("/cluster", cfg.GetClusterInfo())]
11322     ilist = cfg.GetAllInstancesInfo().values()
11323     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
11324     nlist = cfg.GetAllNodesInfo().values()
11325     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
11326     tgts.extend(("/nodegroup/%s" % n.name, n)
11327                 for n in cfg.GetAllNodeGroupsInfo().values())
11328     results = []
11329     for path, target in tgts:
11330       for tag in target.GetTags():
11331         if self.re.search(tag):
11332           results.append((path, tag))
11333     return results
11334
11335
11336 class LUTagsSet(TagsLU):
11337   """Sets a tag on a given object.
11338
11339   """
11340   REQ_BGL = False
11341
11342   def CheckPrereq(self):
11343     """Check prerequisites.
11344
11345     This checks the type and length of the tag name and value.
11346
11347     """
11348     TagsLU.CheckPrereq(self)
11349     for tag in self.op.tags:
11350       objects.TaggableObject.ValidateTag(tag)
11351
11352   def Exec(self, feedback_fn):
11353     """Sets the tag.
11354
11355     """
11356     try:
11357       for tag in self.op.tags:
11358         self.target.AddTag(tag)
11359     except errors.TagError, err:
11360       raise errors.OpExecError("Error while setting tag: %s" % str(err))
11361     self.cfg.Update(self.target, feedback_fn)
11362
11363
11364 class LUTagsDel(TagsLU):
11365   """Delete a list of tags from a given object.
11366
11367   """
11368   REQ_BGL = False
11369
11370   def CheckPrereq(self):
11371     """Check prerequisites.
11372
11373     This checks that we have the given tag.
11374
11375     """
11376     TagsLU.CheckPrereq(self)
11377     for tag in self.op.tags:
11378       objects.TaggableObject.ValidateTag(tag)
11379     del_tags = frozenset(self.op.tags)
11380     cur_tags = self.target.GetTags()
11381
11382     diff_tags = del_tags - cur_tags
11383     if diff_tags:
11384       diff_names = ("'%s'" % i for i in sorted(diff_tags))
11385       raise errors.OpPrereqError("Tag(s) %s not found" %
11386                                  (utils.CommaJoin(diff_names), ),
11387                                  errors.ECODE_NOENT)
11388
11389   def Exec(self, feedback_fn):
11390     """Remove the tag from the object.
11391
11392     """
11393     for tag in self.op.tags:
11394       self.target.RemoveTag(tag)
11395     self.cfg.Update(self.target, feedback_fn)
11396
11397
11398 class LUTestDelay(NoHooksLU):
11399   """Sleep for a specified amount of time.
11400
11401   This LU sleeps on the master and/or nodes for a specified amount of
11402   time.
11403
11404   """
11405   REQ_BGL = False
11406
11407   def ExpandNames(self):
11408     """Expand names and set required locks.
11409
11410     This expands the node list, if any.
11411
11412     """
11413     self.needed_locks = {}
11414     if self.op.on_nodes:
11415       # _GetWantedNodes can be used here, but is not always appropriate to use
11416       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
11417       # more information.
11418       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
11419       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
11420
11421   def _TestDelay(self):
11422     """Do the actual sleep.
11423
11424     """
11425     if self.op.on_master:
11426       if not utils.TestDelay(self.op.duration):
11427         raise errors.OpExecError("Error during master delay test")
11428     if self.op.on_nodes:
11429       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
11430       for node, node_result in result.items():
11431         node_result.Raise("Failure during rpc call to node %s" % node)
11432
11433   def Exec(self, feedback_fn):
11434     """Execute the test delay opcode, with the wanted repetitions.
11435
11436     """
11437     if self.op.repeat == 0:
11438       self._TestDelay()
11439     else:
11440       top_value = self.op.repeat - 1
11441       for i in range(self.op.repeat):
11442         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
11443         self._TestDelay()
11444
11445
11446 class LUTestJqueue(NoHooksLU):
11447   """Utility LU to test some aspects of the job queue.
11448
11449   """
11450   REQ_BGL = False
11451
11452   # Must be lower than default timeout for WaitForJobChange to see whether it
11453   # notices changed jobs
11454   _CLIENT_CONNECT_TIMEOUT = 20.0
11455   _CLIENT_CONFIRM_TIMEOUT = 60.0
11456
11457   @classmethod
11458   def _NotifyUsingSocket(cls, cb, errcls):
11459     """Opens a Unix socket and waits for another program to connect.
11460
11461     @type cb: callable
11462     @param cb: Callback to send socket name to client
11463     @type errcls: class
11464     @param errcls: Exception class to use for errors
11465
11466     """
11467     # Using a temporary directory as there's no easy way to create temporary
11468     # sockets without writing a custom loop around tempfile.mktemp and
11469     # socket.bind
11470     tmpdir = tempfile.mkdtemp()
11471     try:
11472       tmpsock = utils.PathJoin(tmpdir, "sock")
11473
11474       logging.debug("Creating temporary socket at %s", tmpsock)
11475       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
11476       try:
11477         sock.bind(tmpsock)
11478         sock.listen(1)
11479
11480         # Send details to client
11481         cb(tmpsock)
11482
11483         # Wait for client to connect before continuing
11484         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
11485         try:
11486           (conn, _) = sock.accept()
11487         except socket.error, err:
11488           raise errcls("Client didn't connect in time (%s)" % err)
11489       finally:
11490         sock.close()
11491     finally:
11492       # Remove as soon as client is connected
11493       shutil.rmtree(tmpdir)
11494
11495     # Wait for client to close
11496     try:
11497       try:
11498         # pylint: disable-msg=E1101
11499         # Instance of '_socketobject' has no ... member
11500         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
11501         conn.recv(1)
11502       except socket.error, err:
11503         raise errcls("Client failed to confirm notification (%s)" % err)
11504     finally:
11505       conn.close()
11506
11507   def _SendNotification(self, test, arg, sockname):
11508     """Sends a notification to the client.
11509
11510     @type test: string
11511     @param test: Test name
11512     @param arg: Test argument (depends on test)
11513     @type sockname: string
11514     @param sockname: Socket path
11515
11516     """
11517     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
11518
11519   def _Notify(self, prereq, test, arg):
11520     """Notifies the client of a test.
11521
11522     @type prereq: bool
11523     @param prereq: Whether this is a prereq-phase test
11524     @type test: string
11525     @param test: Test name
11526     @param arg: Test argument (depends on test)
11527
11528     """
11529     if prereq:
11530       errcls = errors.OpPrereqError
11531     else:
11532       errcls = errors.OpExecError
11533
11534     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
11535                                                   test, arg),
11536                                    errcls)
11537
11538   def CheckArguments(self):
11539     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
11540     self.expandnames_calls = 0
11541
11542   def ExpandNames(self):
11543     checkargs_calls = getattr(self, "checkargs_calls", 0)
11544     if checkargs_calls < 1:
11545       raise errors.ProgrammerError("CheckArguments was not called")
11546
11547     self.expandnames_calls += 1
11548
11549     if self.op.notify_waitlock:
11550       self._Notify(True, constants.JQT_EXPANDNAMES, None)
11551
11552     self.LogInfo("Expanding names")
11553
11554     # Get lock on master node (just to get a lock, not for a particular reason)
11555     self.needed_locks = {
11556       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
11557       }
11558
11559   def Exec(self, feedback_fn):
11560     if self.expandnames_calls < 1:
11561       raise errors.ProgrammerError("ExpandNames was not called")
11562
11563     if self.op.notify_exec:
11564       self._Notify(False, constants.JQT_EXEC, None)
11565
11566     self.LogInfo("Executing")
11567
11568     if self.op.log_messages:
11569       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
11570       for idx, msg in enumerate(self.op.log_messages):
11571         self.LogInfo("Sending log message %s", idx + 1)
11572         feedback_fn(constants.JQT_MSGPREFIX + msg)
11573         # Report how many test messages have been sent
11574         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
11575
11576     if self.op.fail:
11577       raise errors.OpExecError("Opcode failure was requested")
11578
11579     return True
11580
11581
11582 class IAllocator(object):
11583   """IAllocator framework.
11584
11585   An IAllocator instance has three sets of attributes:
11586     - cfg that is needed to query the cluster
11587     - input data (all members of the _KEYS class attribute are required)
11588     - four buffer attributes (in|out_data|text), that represent the
11589       input (to the external script) in text and data structure format,
11590       and the output from it, again in two formats
11591     - the result variables from the script (success, info, nodes) for
11592       easy usage
11593
11594   """
11595   # pylint: disable-msg=R0902
11596   # lots of instance attributes
11597   _ALLO_KEYS = [
11598     "name", "mem_size", "disks", "disk_template",
11599     "os", "tags", "nics", "vcpus", "hypervisor",
11600     ]
11601   _RELO_KEYS = [
11602     "name", "relocate_from",
11603     ]
11604   _EVAC_KEYS = [
11605     "evac_nodes",
11606     ]
11607
11608   def __init__(self, cfg, rpc, mode, **kwargs):
11609     self.cfg = cfg
11610     self.rpc = rpc
11611     # init buffer variables
11612     self.in_text = self.out_text = self.in_data = self.out_data = None
11613     # init all input fields so that pylint is happy
11614     self.mode = mode
11615     self.mem_size = self.disks = self.disk_template = None
11616     self.os = self.tags = self.nics = self.vcpus = None
11617     self.hypervisor = None
11618     self.relocate_from = None
11619     self.name = None
11620     self.evac_nodes = None
11621     # computed fields
11622     self.required_nodes = None
11623     # init result fields
11624     self.success = self.info = self.result = None
11625     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11626       keyset = self._ALLO_KEYS
11627       fn = self._AddNewInstance
11628     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11629       keyset = self._RELO_KEYS
11630       fn = self._AddRelocateInstance
11631     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11632       keyset = self._EVAC_KEYS
11633       fn = self._AddEvacuateNodes
11634     else:
11635       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
11636                                    " IAllocator" % self.mode)
11637     for key in kwargs:
11638       if key not in keyset:
11639         raise errors.ProgrammerError("Invalid input parameter '%s' to"
11640                                      " IAllocator" % key)
11641       setattr(self, key, kwargs[key])
11642
11643     for key in keyset:
11644       if key not in kwargs:
11645         raise errors.ProgrammerError("Missing input parameter '%s' to"
11646                                      " IAllocator" % key)
11647     self._BuildInputData(fn)
11648
11649   def _ComputeClusterData(self):
11650     """Compute the generic allocator input data.
11651
11652     This is the data that is independent of the actual operation.
11653
11654     """
11655     cfg = self.cfg
11656     cluster_info = cfg.GetClusterInfo()
11657     # cluster data
11658     data = {
11659       "version": constants.IALLOCATOR_VERSION,
11660       "cluster_name": cfg.GetClusterName(),
11661       "cluster_tags": list(cluster_info.GetTags()),
11662       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
11663       # we don't have job IDs
11664       }
11665     ninfo = cfg.GetAllNodesInfo()
11666     iinfo = cfg.GetAllInstancesInfo().values()
11667     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
11668
11669     # node data
11670     node_list = [n.name for n in ninfo.values() if n.vm_capable]
11671
11672     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
11673       hypervisor_name = self.hypervisor
11674     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
11675       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
11676     elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
11677       hypervisor_name = cluster_info.enabled_hypervisors[0]
11678
11679     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
11680                                         hypervisor_name)
11681     node_iinfo = \
11682       self.rpc.call_all_instances_info(node_list,
11683                                        cluster_info.enabled_hypervisors)
11684
11685     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
11686
11687     config_ndata = self._ComputeBasicNodeData(ninfo)
11688     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
11689                                                  i_list, config_ndata)
11690     assert len(data["nodes"]) == len(ninfo), \
11691         "Incomplete node data computed"
11692
11693     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
11694
11695     self.in_data = data
11696
11697   @staticmethod
11698   def _ComputeNodeGroupData(cfg):
11699     """Compute node groups data.
11700
11701     """
11702     ng = {}
11703     for guuid, gdata in cfg.GetAllNodeGroupsInfo().items():
11704       ng[guuid] = {
11705         "name": gdata.name,
11706         "alloc_policy": gdata.alloc_policy,
11707         }
11708     return ng
11709
11710   @staticmethod
11711   def _ComputeBasicNodeData(node_cfg):
11712     """Compute global node data.
11713
11714     @rtype: dict
11715     @returns: a dict of name: (node dict, node config)
11716
11717     """
11718     node_results = {}
11719     for ninfo in node_cfg.values():
11720       # fill in static (config-based) values
11721       pnr = {
11722         "tags": list(ninfo.GetTags()),
11723         "primary_ip": ninfo.primary_ip,
11724         "secondary_ip": ninfo.secondary_ip,
11725         "offline": ninfo.offline,
11726         "drained": ninfo.drained,
11727         "master_candidate": ninfo.master_candidate,
11728         "group": ninfo.group,
11729         "master_capable": ninfo.master_capable,
11730         "vm_capable": ninfo.vm_capable,
11731         }
11732
11733       node_results[ninfo.name] = pnr
11734
11735     return node_results
11736
11737   @staticmethod
11738   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
11739                               node_results):
11740     """Compute global node data.
11741
11742     @param node_results: the basic node structures as filled from the config
11743
11744     """
11745     # make a copy of the current dict
11746     node_results = dict(node_results)
11747     for nname, nresult in node_data.items():
11748       assert nname in node_results, "Missing basic data for node %s" % nname
11749       ninfo = node_cfg[nname]
11750
11751       if not (ninfo.offline or ninfo.drained):
11752         nresult.Raise("Can't get data for node %s" % nname)
11753         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
11754                                 nname)
11755         remote_info = nresult.payload
11756
11757         for attr in ['memory_total', 'memory_free', 'memory_dom0',
11758                      'vg_size', 'vg_free', 'cpu_total']:
11759           if attr not in remote_info:
11760             raise errors.OpExecError("Node '%s' didn't return attribute"
11761                                      " '%s'" % (nname, attr))
11762           if not isinstance(remote_info[attr], int):
11763             raise errors.OpExecError("Node '%s' returned invalid value"
11764                                      " for '%s': %s" %
11765                                      (nname, attr, remote_info[attr]))
11766         # compute memory used by primary instances
11767         i_p_mem = i_p_up_mem = 0
11768         for iinfo, beinfo in i_list:
11769           if iinfo.primary_node == nname:
11770             i_p_mem += beinfo[constants.BE_MEMORY]
11771             if iinfo.name not in node_iinfo[nname].payload:
11772               i_used_mem = 0
11773             else:
11774               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]['memory'])
11775             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
11776             remote_info['memory_free'] -= max(0, i_mem_diff)
11777
11778             if iinfo.admin_up:
11779               i_p_up_mem += beinfo[constants.BE_MEMORY]
11780
11781         # compute memory used by instances
11782         pnr_dyn = {
11783           "total_memory": remote_info['memory_total'],
11784           "reserved_memory": remote_info['memory_dom0'],
11785           "free_memory": remote_info['memory_free'],
11786           "total_disk": remote_info['vg_size'],
11787           "free_disk": remote_info['vg_free'],
11788           "total_cpus": remote_info['cpu_total'],
11789           "i_pri_memory": i_p_mem,
11790           "i_pri_up_memory": i_p_up_mem,
11791           }
11792         pnr_dyn.update(node_results[nname])
11793         node_results[nname] = pnr_dyn
11794
11795     return node_results
11796
11797   @staticmethod
11798   def _ComputeInstanceData(cluster_info, i_list):
11799     """Compute global instance data.
11800
11801     """
11802     instance_data = {}
11803     for iinfo, beinfo in i_list:
11804       nic_data = []
11805       for nic in iinfo.nics:
11806         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
11807         nic_dict = {"mac": nic.mac,
11808                     "ip": nic.ip,
11809                     "mode": filled_params[constants.NIC_MODE],
11810                     "link": filled_params[constants.NIC_LINK],
11811                    }
11812         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
11813           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
11814         nic_data.append(nic_dict)
11815       pir = {
11816         "tags": list(iinfo.GetTags()),
11817         "admin_up": iinfo.admin_up,
11818         "vcpus": beinfo[constants.BE_VCPUS],
11819         "memory": beinfo[constants.BE_MEMORY],
11820         "os": iinfo.os,
11821         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
11822         "nics": nic_data,
11823         "disks": [{constants.IDISK_SIZE: dsk.size,
11824                    constants.IDISK_MODE: dsk.mode}
11825                   for dsk in iinfo.disks],
11826         "disk_template": iinfo.disk_template,
11827         "hypervisor": iinfo.hypervisor,
11828         }
11829       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
11830                                                  pir["disks"])
11831       instance_data[iinfo.name] = pir
11832
11833     return instance_data
11834
11835   def _AddNewInstance(self):
11836     """Add new instance data to allocator structure.
11837
11838     This in combination with _AllocatorGetClusterData will create the
11839     correct structure needed as input for the allocator.
11840
11841     The checks for the completeness of the opcode must have already been
11842     done.
11843
11844     """
11845     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
11846
11847     if self.disk_template in constants.DTS_INT_MIRROR:
11848       self.required_nodes = 2
11849     else:
11850       self.required_nodes = 1
11851     request = {
11852       "name": self.name,
11853       "disk_template": self.disk_template,
11854       "tags": self.tags,
11855       "os": self.os,
11856       "vcpus": self.vcpus,
11857       "memory": self.mem_size,
11858       "disks": self.disks,
11859       "disk_space_total": disk_space,
11860       "nics": self.nics,
11861       "required_nodes": self.required_nodes,
11862       }
11863     return request
11864
11865   def _AddRelocateInstance(self):
11866     """Add relocate instance data to allocator structure.
11867
11868     This in combination with _IAllocatorGetClusterData will create the
11869     correct structure needed as input for the allocator.
11870
11871     The checks for the completeness of the opcode must have already been
11872     done.
11873
11874     """
11875     instance = self.cfg.GetInstanceInfo(self.name)
11876     if instance is None:
11877       raise errors.ProgrammerError("Unknown instance '%s' passed to"
11878                                    " IAllocator" % self.name)
11879
11880     if instance.disk_template not in constants.DTS_MIRRORED:
11881       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
11882                                  errors.ECODE_INVAL)
11883
11884     if instance.disk_template in constants.DTS_INT_MIRROR and \
11885         len(instance.secondary_nodes) != 1:
11886       raise errors.OpPrereqError("Instance has not exactly one secondary node",
11887                                  errors.ECODE_STATE)
11888
11889     self.required_nodes = 1
11890     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
11891     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
11892
11893     request = {
11894       "name": self.name,
11895       "disk_space_total": disk_space,
11896       "required_nodes": self.required_nodes,
11897       "relocate_from": self.relocate_from,
11898       }
11899     return request
11900
11901   def _AddEvacuateNodes(self):
11902     """Add evacuate nodes data to allocator structure.
11903
11904     """
11905     request = {
11906       "evac_nodes": self.evac_nodes
11907       }
11908     return request
11909
11910   def _BuildInputData(self, fn):
11911     """Build input data structures.
11912
11913     """
11914     self._ComputeClusterData()
11915
11916     request = fn()
11917     request["type"] = self.mode
11918     self.in_data["request"] = request
11919
11920     self.in_text = serializer.Dump(self.in_data)
11921
11922   def Run(self, name, validate=True, call_fn=None):
11923     """Run an instance allocator and return the results.
11924
11925     """
11926     if call_fn is None:
11927       call_fn = self.rpc.call_iallocator_runner
11928
11929     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
11930     result.Raise("Failure while running the iallocator script")
11931
11932     self.out_text = result.payload
11933     if validate:
11934       self._ValidateResult()
11935
11936   def _ValidateResult(self):
11937     """Process the allocator results.
11938
11939     This will process and if successful save the result in
11940     self.out_data and the other parameters.
11941
11942     """
11943     try:
11944       rdict = serializer.Load(self.out_text)
11945     except Exception, err:
11946       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
11947
11948     if not isinstance(rdict, dict):
11949       raise errors.OpExecError("Can't parse iallocator results: not a dict")
11950
11951     # TODO: remove backwards compatiblity in later versions
11952     if "nodes" in rdict and "result" not in rdict:
11953       rdict["result"] = rdict["nodes"]
11954       del rdict["nodes"]
11955
11956     for key in "success", "info", "result":
11957       if key not in rdict:
11958         raise errors.OpExecError("Can't parse iallocator results:"
11959                                  " missing key '%s'" % key)
11960       setattr(self, key, rdict[key])
11961
11962     if not isinstance(rdict["result"], list):
11963       raise errors.OpExecError("Can't parse iallocator results: 'result' key"
11964                                " is not a list")
11965
11966     if self.mode == constants.IALLOCATOR_MODE_RELOC:
11967       assert self.relocate_from is not None
11968       assert self.required_nodes == 1
11969
11970       node2group = dict((name, ndata["group"])
11971                         for (name, ndata) in self.in_data["nodes"].items())
11972
11973       fn = compat.partial(self._NodesToGroups, node2group,
11974                           self.in_data["nodegroups"])
11975
11976       request_groups = fn(self.relocate_from)
11977       result_groups = fn(rdict["result"])
11978
11979       if result_groups != request_groups:
11980         raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
11981                                  " differ from original groups (%s)" %
11982                                  (utils.CommaJoin(result_groups),
11983                                   utils.CommaJoin(request_groups)))
11984
11985     self.out_data = rdict
11986
11987   @staticmethod
11988   def _NodesToGroups(node2group, groups, nodes):
11989     """Returns a list of unique group names for a list of nodes.
11990
11991     @type node2group: dict
11992     @param node2group: Map from node name to group UUID
11993     @type groups: dict
11994     @param groups: Group information
11995     @type nodes: list
11996     @param nodes: Node names
11997
11998     """
11999     result = set()
12000
12001     for node in nodes:
12002       try:
12003         group_uuid = node2group[node]
12004       except KeyError:
12005         # Ignore unknown node
12006         pass
12007       else:
12008         try:
12009           group = groups[group_uuid]
12010         except KeyError:
12011           # Can't find group, let's use UUID
12012           group_name = group_uuid
12013         else:
12014           group_name = group["name"]
12015
12016         result.add(group_name)
12017
12018     return sorted(result)
12019
12020
12021 class LUTestAllocator(NoHooksLU):
12022   """Run allocator tests.
12023
12024   This LU runs the allocator tests
12025
12026   """
12027   def CheckPrereq(self):
12028     """Check prerequisites.
12029
12030     This checks the opcode parameters depending on the director and mode test.
12031
12032     """
12033     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12034       for attr in ["mem_size", "disks", "disk_template",
12035                    "os", "tags", "nics", "vcpus"]:
12036         if not hasattr(self.op, attr):
12037           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
12038                                      attr, errors.ECODE_INVAL)
12039       iname = self.cfg.ExpandInstanceName(self.op.name)
12040       if iname is not None:
12041         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
12042                                    iname, errors.ECODE_EXISTS)
12043       if not isinstance(self.op.nics, list):
12044         raise errors.OpPrereqError("Invalid parameter 'nics'",
12045                                    errors.ECODE_INVAL)
12046       if not isinstance(self.op.disks, list):
12047         raise errors.OpPrereqError("Invalid parameter 'disks'",
12048                                    errors.ECODE_INVAL)
12049       for row in self.op.disks:
12050         if (not isinstance(row, dict) or
12051             "size" not in row or
12052             not isinstance(row["size"], int) or
12053             "mode" not in row or
12054             row["mode"] not in ['r', 'w']):
12055           raise errors.OpPrereqError("Invalid contents of the 'disks'"
12056                                      " parameter", errors.ECODE_INVAL)
12057       if self.op.hypervisor is None:
12058         self.op.hypervisor = self.cfg.GetHypervisorType()
12059     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12060       fname = _ExpandInstanceName(self.cfg, self.op.name)
12061       self.op.name = fname
12062       self.relocate_from = self.cfg.GetInstanceInfo(fname).secondary_nodes
12063     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12064       if not hasattr(self.op, "evac_nodes"):
12065         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
12066                                    " opcode input", errors.ECODE_INVAL)
12067     else:
12068       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
12069                                  self.op.mode, errors.ECODE_INVAL)
12070
12071     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
12072       if self.op.allocator is None:
12073         raise errors.OpPrereqError("Missing allocator name",
12074                                    errors.ECODE_INVAL)
12075     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
12076       raise errors.OpPrereqError("Wrong allocator test '%s'" %
12077                                  self.op.direction, errors.ECODE_INVAL)
12078
12079   def Exec(self, feedback_fn):
12080     """Run the allocator test.
12081
12082     """
12083     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
12084       ial = IAllocator(self.cfg, self.rpc,
12085                        mode=self.op.mode,
12086                        name=self.op.name,
12087                        mem_size=self.op.mem_size,
12088                        disks=self.op.disks,
12089                        disk_template=self.op.disk_template,
12090                        os=self.op.os,
12091                        tags=self.op.tags,
12092                        nics=self.op.nics,
12093                        vcpus=self.op.vcpus,
12094                        hypervisor=self.op.hypervisor,
12095                        )
12096     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
12097       ial = IAllocator(self.cfg, self.rpc,
12098                        mode=self.op.mode,
12099                        name=self.op.name,
12100                        relocate_from=list(self.relocate_from),
12101                        )
12102     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
12103       ial = IAllocator(self.cfg, self.rpc,
12104                        mode=self.op.mode,
12105                        evac_nodes=self.op.evac_nodes)
12106     else:
12107       raise errors.ProgrammerError("Uncatched mode %s in"
12108                                    " LUTestAllocator.Exec", self.op.mode)
12109
12110     if self.op.direction == constants.IALLOCATOR_DIR_IN:
12111       result = ial.in_text
12112     else:
12113       ial.Run(self.op.allocator, validate=False)
12114       result = ial.out_text
12115     return result
12116
12117
12118 #: Query type implementations
12119 _QUERY_IMPL = {
12120   constants.QR_INSTANCE: _InstanceQuery,
12121   constants.QR_NODE: _NodeQuery,
12122   constants.QR_GROUP: _GroupQuery,
12123   constants.QR_OS: _OsQuery,
12124   }
12125
12126 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
12127
12128
12129 def _GetQueryImplementation(name):
12130   """Returns the implemtnation for a query type.
12131
12132   @param name: Query type, must be one of L{constants.QR_VIA_OP}
12133
12134   """
12135   try:
12136     return _QUERY_IMPL[name]
12137   except KeyError:
12138     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
12139                                errors.ECODE_INVAL)