code.grnet.gr Git - ganeti-local/blob - lib/cmdlib.py

   1 #
   2 #
   3
   4 # Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011 Google Inc.
   5 #
   6 # This program is free software; you can redistribute it and/or modify
   7 # it under the terms of the GNU General Public License as published by
   8 # the Free Software Foundation; either version 2 of the License, or
   9 # (at your option) any later version.
  10 #
  11 # This program is distributed in the hope that it will be useful, but
  12 # WITHOUT ANY WARRANTY; without even the implied warranty of
  13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  14 # General Public License for more details.
  15 #
  16 # You should have received a copy of the GNU General Public License
  17 # along with this program; if not, write to the Free Software
  18 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
  19 # 02110-1301, USA.
  20
  21
  22 """Module implementing the master-side code."""
  23
  24 # pylint: disable-msg=W0201,C0302
  25
  26 # W0201 since most LU attributes are defined in CheckPrereq or similar
  27 # functions
  28
  29 # C0302: since we have waaaay to many lines in this module
  30
  31 import os
  32 import os.path
  33 import time
  34 import re
  35 import platform
  36 import logging
  37 import copy
  38 import OpenSSL
  39 import socket
  40 import tempfile
  41 import shutil
  42 import itertools
  43 import operator
  44
  45 from ganeti import ssh
  46 from ganeti import utils
  47 from ganeti import errors
  48 from ganeti import hypervisor
  49 from ganeti import locking
  50 from ganeti import constants
  51 from ganeti import objects
  52 from ganeti import serializer
  53 from ganeti import ssconf
  54 from ganeti import uidpool
  55 from ganeti import compat
  56 from ganeti import masterd
  57 from ganeti import netutils
  58 from ganeti import query
  59 from ganeti import qlang
  60 from ganeti import opcodes
  61 from ganeti import ht
  62
  63 import ganeti.masterd.instance # pylint: disable-msg=W0611
  64
  65
  66 class ResultWithJobs:
  67   """Data container for LU results with jobs.
  68
  69   Instances of this class returned from L{LogicalUnit.Exec} will be recognized
  70   by L{mcpu.Processor._ProcessResult}. The latter will then submit the jobs
  71   contained in the C{jobs} attribute and include the job IDs in the opcode
  72   result.
  73
  74   """
  75   def __init__(self, jobs, **kwargs):
  76     """Initializes this class.
  77
  78     Additional return values can be specified as keyword arguments.
  79
  80     @type jobs: list of lists of L{opcode.OpCode}
  81     @param jobs: A list of lists of opcode objects
  82
  83     """
  84     self.jobs = jobs
  85     self.other = kwargs
  86
  87
  88 class LogicalUnit(object):
  89   """Logical Unit base class.
  90
  91   Subclasses must follow these rules:
  92     - implement ExpandNames
  93     - implement CheckPrereq (except when tasklets are used)
  94     - implement Exec (except when tasklets are used)
  95     - implement BuildHooksEnv
  96     - implement BuildHooksNodes
  97     - redefine HPATH and HTYPE
  98     - optionally redefine their run requirements:
  99         REQ_BGL: the LU needs to hold the Big Ganeti Lock exclusively
 100
 101   Note that all commands require root permissions.
 102
 103   @ivar dry_run_result: the value (if any) that will be returned to the caller
 104       in dry-run mode (signalled by opcode dry_run parameter)
 105
 106   """
 107   HPATH = None
 108   HTYPE = None
 109   REQ_BGL = True
 110
 111   def __init__(self, processor, op, context, rpc):
 112     """Constructor for LogicalUnit.
 113
 114     This needs to be overridden in derived classes in order to check op
 115     validity.
 116
 117     """
 118     self.proc = processor
 119     self.op = op
 120     self.cfg = context.cfg
 121     self.glm = context.glm
 122     self.context = context
 123     self.rpc = rpc
 124     # Dicts used to declare locking needs to mcpu
 125     self.needed_locks = None
 126     self.share_locks = dict.fromkeys(locking.LEVELS, 0)
 127     self.add_locks = {}
 128     self.remove_locks = {}
 129     # Used to force good behavior when calling helper functions
 130     self.recalculate_locks = {}
 131     # logging
 132     self.Log = processor.Log # pylint: disable-msg=C0103
 133     self.LogWarning = processor.LogWarning # pylint: disable-msg=C0103
 134     self.LogInfo = processor.LogInfo # pylint: disable-msg=C0103
 135     self.LogStep = processor.LogStep # pylint: disable-msg=C0103
 136     # support for dry-run
 137     self.dry_run_result = None
 138     # support for generic debug attribute
 139     if (not hasattr(self.op, "debug_level") or
 140         not isinstance(self.op.debug_level, int)):
 141       self.op.debug_level = 0
 142
 143     # Tasklets
 144     self.tasklets = None
 145
 146     # Validate opcode parameters and set defaults
 147     self.op.Validate(True)
 148
 149     self.CheckArguments()
 150
 151   def CheckArguments(self):
 152     """Check syntactic validity for the opcode arguments.
 153
 154     This method is for doing a simple syntactic check and ensure
 155     validity of opcode parameters, without any cluster-related
 156     checks. While the same can be accomplished in ExpandNames and/or
 157     CheckPrereq, doing these separate is better because:
 158
 159       - ExpandNames is left as as purely a lock-related function
 160       - CheckPrereq is run after we have acquired locks (and possible
 161         waited for them)
 162
 163     The function is allowed to change the self.op attribute so that
 164     later methods can no longer worry about missing parameters.
 165
 166     """
 167     pass
 168
 169   def ExpandNames(self):
 170     """Expand names for this LU.
 171
 172     This method is called before starting to execute the opcode, and it should
 173     update all the parameters of the opcode to their canonical form (e.g. a
 174     short node name must be fully expanded after this method has successfully
 175     completed). This way locking, hooks, logging, etc. can work correctly.
 176
 177     LUs which implement this method must also populate the self.needed_locks
 178     member, as a dict with lock levels as keys, and a list of needed lock names
 179     as values. Rules:
 180
 181       - use an empty dict if you don't need any lock
 182       - if you don't need any lock at a particular level omit that level
 183       - don't put anything for the BGL level
 184       - if you want all locks at a level use locking.ALL_SET as a value
 185
 186     If you need to share locks (rather than acquire them exclusively) at one
 187     level you can modify self.share_locks, setting a true value (usually 1) for
 188     that level. By default locks are not shared.
 189
 190     This function can also define a list of tasklets, which then will be
 191     executed in order instead of the usual LU-level CheckPrereq and Exec
 192     functions, if those are not defined by the LU.
 193
 194     Examples::
 195
 196       # Acquire all nodes and one instance
 197       self.needed_locks = {
 198         locking.LEVEL_NODE: locking.ALL_SET,
 199         locking.LEVEL_INSTANCE: ['instance1.example.com'],
 200       }
 201       # Acquire just two nodes
 202       self.needed_locks = {
 203         locking.LEVEL_NODE: ['node1.example.com', 'node2.example.com'],
 204       }
 205       # Acquire no locks
 206       self.needed_locks = {} # No, you can't leave it to the default value None
 207
 208     """
 209     # The implementation of this method is mandatory only if the new LU is
 210     # concurrent, so that old LUs don't need to be changed all at the same
 211     # time.
 212     if self.REQ_BGL:
 213       self.needed_locks = {} # Exclusive LUs don't need locks.
 214     else:
 215       raise NotImplementedError
 216
 217   def DeclareLocks(self, level):
 218     """Declare LU locking needs for a level
 219
 220     While most LUs can just declare their locking needs at ExpandNames time,
 221     sometimes there's the need to calculate some locks after having acquired
 222     the ones before. This function is called just before acquiring locks at a
 223     particular level, but after acquiring the ones at lower levels, and permits
 224     such calculations. It can be used to modify self.needed_locks, and by
 225     default it does nothing.
 226
 227     This function is only called if you have something already set in
 228     self.needed_locks for the level.
 229
 230     @param level: Locking level which is going to be locked
 231     @type level: member of ganeti.locking.LEVELS
 232
 233     """
 234
 235   def CheckPrereq(self):
 236     """Check prerequisites for this LU.
 237
 238     This method should check that the prerequisites for the execution
 239     of this LU are fulfilled. It can do internode communication, but
 240     it should be idempotent - no cluster or system changes are
 241     allowed.
 242
 243     The method should raise errors.OpPrereqError in case something is
 244     not fulfilled. Its return value is ignored.
 245
 246     This method should also update all the parameters of the opcode to
 247     their canonical form if it hasn't been done by ExpandNames before.
 248
 249     """
 250     if self.tasklets is not None:
 251       for (idx, tl) in enumerate(self.tasklets):
 252         logging.debug("Checking prerequisites for tasklet %s/%s",
 253                       idx + 1, len(self.tasklets))
 254         tl.CheckPrereq()
 255     else:
 256       pass
 257
 258   def Exec(self, feedback_fn):
 259     """Execute the LU.
 260
 261     This method should implement the actual work. It should raise
 262     errors.OpExecError for failures that are somewhat dealt with in
 263     code, or expected.
 264
 265     """
 266     if self.tasklets is not None:
 267       for (idx, tl) in enumerate(self.tasklets):
 268         logging.debug("Executing tasklet %s/%s", idx + 1, len(self.tasklets))
 269         tl.Exec(feedback_fn)
 270     else:
 271       raise NotImplementedError
 272
 273   def BuildHooksEnv(self):
 274     """Build hooks environment for this LU.
 275
 276     @rtype: dict
 277     @return: Dictionary containing the environment that will be used for
 278       running the hooks for this LU. The keys of the dict must not be prefixed
 279       with "GANETI_"--that'll be added by the hooks runner. The hooks runner
 280       will extend the environment with additional variables. If no environment
 281       should be defined, an empty dictionary should be returned (not C{None}).
 282     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 283       will not be called.
 284
 285     """
 286     raise NotImplementedError
 287
 288   def BuildHooksNodes(self):
 289     """Build list of nodes to run LU's hooks.
 290
 291     @rtype: tuple; (list, list)
 292     @return: Tuple containing a list of node names on which the hook
 293       should run before the execution and a list of node names on which the
 294       hook should run after the execution. No nodes should be returned as an
 295       empty list (and not None).
 296     @note: If the C{HPATH} attribute of the LU class is C{None}, this function
 297       will not be called.
 298
 299     """
 300     raise NotImplementedError
 301
 302   def HooksCallBack(self, phase, hook_results, feedback_fn, lu_result):
 303     """Notify the LU about the results of its hooks.
 304
 305     This method is called every time a hooks phase is executed, and notifies
 306     the Logical Unit about the hooks' result. The LU can then use it to alter
 307     its result based on the hooks.  By default the method does nothing and the
 308     previous result is passed back unchanged but any LU can define it if it
 309     wants to use the local cluster hook-scripts somehow.
 310
 311     @param phase: one of L{constants.HOOKS_PHASE_POST} or
 312         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
 313     @param hook_results: the results of the multi-node hooks rpc call
 314     @param feedback_fn: function used send feedback back to the caller
 315     @param lu_result: the previous Exec result this LU had, or None
 316         in the PRE phase
 317     @return: the new Exec result, based on the previous result
 318         and hook results
 319
 320     """
 321     # API must be kept, thus we ignore the unused argument and could
 322     # be a function warnings
 323     # pylint: disable-msg=W0613,R0201
 324     return lu_result
 325
 326   def _ExpandAndLockInstance(self):
 327     """Helper function to expand and lock an instance.
 328
 329     Many LUs that work on an instance take its name in self.op.instance_name
 330     and need to expand it and then declare the expanded name for locking. This
 331     function does it, and then updates self.op.instance_name to the expanded
 332     name. It also initializes needed_locks as a dict, if this hasn't been done
 333     before.
 334
 335     """
 336     if self.needed_locks is None:
 337       self.needed_locks = {}
 338     else:
 339       assert locking.LEVEL_INSTANCE not in self.needed_locks, \
 340         "_ExpandAndLockInstance called with instance-level locks set"
 341     self.op.instance_name = _ExpandInstanceName(self.cfg,
 342                                                 self.op.instance_name)
 343     self.needed_locks[locking.LEVEL_INSTANCE] = self.op.instance_name
 344
 345   def _LockInstancesNodes(self, primary_only=False):
 346     """Helper function to declare instances' nodes for locking.
 347
 348     This function should be called after locking one or more instances to lock
 349     their nodes. Its effect is populating self.needed_locks[locking.LEVEL_NODE]
 350     with all primary or secondary nodes for instances already locked and
 351     present in self.needed_locks[locking.LEVEL_INSTANCE].
 352
 353     It should be called from DeclareLocks, and for safety only works if
 354     self.recalculate_locks[locking.LEVEL_NODE] is set.
 355
 356     In the future it may grow parameters to just lock some instance's nodes, or
 357     to just lock primaries or secondary nodes, if needed.
 358
 359     If should be called in DeclareLocks in a way similar to::
 360
 361       if level == locking.LEVEL_NODE:
 362         self._LockInstancesNodes()
 363
 364     @type primary_only: boolean
 365     @param primary_only: only lock primary nodes of locked instances
 366
 367     """
 368     assert locking.LEVEL_NODE in self.recalculate_locks, \
 369       "_LockInstancesNodes helper function called with no nodes to recalculate"
 370
 371     # TODO: check if we're really been called with the instance locks held
 372
 373     # For now we'll replace self.needed_locks[locking.LEVEL_NODE], but in the
 374     # future we might want to have different behaviors depending on the value
 375     # of self.recalculate_locks[locking.LEVEL_NODE]
 376     wanted_nodes = []
 377     locked_i = self.glm.list_owned(locking.LEVEL_INSTANCE)
 378     for _, instance in self.cfg.GetMultiInstanceInfo(locked_i):
 379       wanted_nodes.append(instance.primary_node)
 380       if not primary_only:
 381         wanted_nodes.extend(instance.secondary_nodes)
 382
 383     if self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_REPLACE:
 384       self.needed_locks[locking.LEVEL_NODE] = wanted_nodes
 385     elif self.recalculate_locks[locking.LEVEL_NODE] == constants.LOCKS_APPEND:
 386       self.needed_locks[locking.LEVEL_NODE].extend(wanted_nodes)
 387
 388     del self.recalculate_locks[locking.LEVEL_NODE]
 389
 390
 391 class NoHooksLU(LogicalUnit): # pylint: disable-msg=W0223
 392   """Simple LU which runs no hooks.
 393
 394   This LU is intended as a parent for other LogicalUnits which will
 395   run no hooks, in order to reduce duplicate code.
 396
 397   """
 398   HPATH = None
 399   HTYPE = None
 400
 401   def BuildHooksEnv(self):
 402     """Empty BuildHooksEnv for NoHooksLu.
 403
 404     This just raises an error.
 405
 406     """
 407     raise AssertionError("BuildHooksEnv called for NoHooksLUs")
 408
 409   def BuildHooksNodes(self):
 410     """Empty BuildHooksNodes for NoHooksLU.
 411
 412     """
 413     raise AssertionError("BuildHooksNodes called for NoHooksLU")
 414
 415
 416 class Tasklet:
 417   """Tasklet base class.
 418
 419   Tasklets are subcomponents for LUs. LUs can consist entirely of tasklets or
 420   they can mix legacy code with tasklets. Locking needs to be done in the LU,
 421   tasklets know nothing about locks.
 422
 423   Subclasses must follow these rules:
 424     - Implement CheckPrereq
 425     - Implement Exec
 426
 427   """
 428   def __init__(self, lu):
 429     self.lu = lu
 430
 431     # Shortcuts
 432     self.cfg = lu.cfg
 433     self.rpc = lu.rpc
 434
 435   def CheckPrereq(self):
 436     """Check prerequisites for this tasklets.
 437
 438     This method should check whether the prerequisites for the execution of
 439     this tasklet are fulfilled. It can do internode communication, but it
 440     should be idempotent - no cluster or system changes are allowed.
 441
 442     The method should raise errors.OpPrereqError in case something is not
 443     fulfilled. Its return value is ignored.
 444
 445     This method should also update all parameters to their canonical form if it
 446     hasn't been done before.
 447
 448     """
 449     pass
 450
 451   def Exec(self, feedback_fn):
 452     """Execute the tasklet.
 453
 454     This method should implement the actual work. It should raise
 455     errors.OpExecError for failures that are somewhat dealt with in code, or
 456     expected.
 457
 458     """
 459     raise NotImplementedError
 460
 461
 462 class _QueryBase:
 463   """Base for query utility classes.
 464
 465   """
 466   #: Attribute holding field definitions
 467   FIELDS = None
 468
 469   def __init__(self, filter_, fields, use_locking):
 470     """Initializes this class.
 471
 472     """
 473     self.use_locking = use_locking
 474
 475     self.query = query.Query(self.FIELDS, fields, filter_=filter_,
 476                              namefield="name")
 477     self.requested_data = self.query.RequestedData()
 478     self.names = self.query.RequestedNames()
 479
 480     # Sort only if no names were requested
 481     self.sort_by_name = not self.names
 482
 483     self.do_locking = None
 484     self.wanted = None
 485
 486   def _GetNames(self, lu, all_names, lock_level):
 487     """Helper function to determine names asked for in the query.
 488
 489     """
 490     if self.do_locking:
 491       names = lu.glm.list_owned(lock_level)
 492     else:
 493       names = all_names
 494
 495     if self.wanted == locking.ALL_SET:
 496       assert not self.names
 497       # caller didn't specify names, so ordering is not important
 498       return utils.NiceSort(names)
 499
 500     # caller specified names and we must keep the same order
 501     assert self.names
 502     assert not self.do_locking or lu.glm.is_owned(lock_level)
 503
 504     missing = set(self.wanted).difference(names)
 505     if missing:
 506       raise errors.OpExecError("Some items were removed before retrieving"
 507                                " their data: %s" % missing)
 508
 509     # Return expanded names
 510     return self.wanted
 511
 512   def ExpandNames(self, lu):
 513     """Expand names for this query.
 514
 515     See L{LogicalUnit.ExpandNames}.
 516
 517     """
 518     raise NotImplementedError()
 519
 520   def DeclareLocks(self, lu, level):
 521     """Declare locks for this query.
 522
 523     See L{LogicalUnit.DeclareLocks}.
 524
 525     """
 526     raise NotImplementedError()
 527
 528   def _GetQueryData(self, lu):
 529     """Collects all data for this query.
 530
 531     @return: Query data object
 532
 533     """
 534     raise NotImplementedError()
 535
 536   def NewStyleQuery(self, lu):
 537     """Collect data and execute query.
 538
 539     """
 540     return query.GetQueryResponse(self.query, self._GetQueryData(lu),
 541                                   sort_by_name=self.sort_by_name)
 542
 543   def OldStyleQuery(self, lu):
 544     """Collect data and execute query.
 545
 546     """
 547     return self.query.OldStyleQuery(self._GetQueryData(lu),
 548                                     sort_by_name=self.sort_by_name)
 549
 550
 551 def _ShareAll():
 552   """Returns a dict declaring all lock levels shared.
 553
 554   """
 555   return dict.fromkeys(locking.LEVELS, 1)
 556
 557
 558 def _SupportsOob(cfg, node):
 559   """Tells if node supports OOB.
 560
 561   @type cfg: L{config.ConfigWriter}
 562   @param cfg: The cluster configuration
 563   @type node: L{objects.Node}
 564   @param node: The node
 565   @return: The OOB script if supported or an empty string otherwise
 566
 567   """
 568   return cfg.GetNdParams(node)[constants.ND_OOB_PROGRAM]
 569
 570
 571 def _GetWantedNodes(lu, nodes):
 572   """Returns list of checked and expanded node names.
 573
 574   @type lu: L{LogicalUnit}
 575   @param lu: the logical unit on whose behalf we execute
 576   @type nodes: list
 577   @param nodes: list of node names or None for all nodes
 578   @rtype: list
 579   @return: the list of nodes, sorted
 580   @raise errors.ProgrammerError: if the nodes parameter is wrong type
 581
 582   """
 583   if nodes:
 584     return [_ExpandNodeName(lu.cfg, name) for name in nodes]
 585
 586   return utils.NiceSort(lu.cfg.GetNodeList())
 587
 588
 589 def _GetWantedInstances(lu, instances):
 590   """Returns list of checked and expanded instance names.
 591
 592   @type lu: L{LogicalUnit}
 593   @param lu: the logical unit on whose behalf we execute
 594   @type instances: list
 595   @param instances: list of instance names or None for all instances
 596   @rtype: list
 597   @return: the list of instances, sorted
 598   @raise errors.OpPrereqError: if the instances parameter is wrong type
 599   @raise errors.OpPrereqError: if any of the passed instances is not found
 600
 601   """
 602   if instances:
 603     wanted = [_ExpandInstanceName(lu.cfg, name) for name in instances]
 604   else:
 605     wanted = utils.NiceSort(lu.cfg.GetInstanceList())
 606   return wanted
 607
 608
 609 def _GetUpdatedParams(old_params, update_dict,
 610                       use_default=True, use_none=False):
 611   """Return the new version of a parameter dictionary.
 612
 613   @type old_params: dict
 614   @param old_params: old parameters
 615   @type update_dict: dict
 616   @param update_dict: dict containing new parameter values, or
 617       constants.VALUE_DEFAULT to reset the parameter to its default
 618       value
 619   @param use_default: boolean
 620   @type use_default: whether to recognise L{constants.VALUE_DEFAULT}
 621       values as 'to be deleted' values
 622   @param use_none: boolean
 623   @type use_none: whether to recognise C{None} values as 'to be
 624       deleted' values
 625   @rtype: dict
 626   @return: the new parameter dictionary
 627
 628   """
 629   params_copy = copy.deepcopy(old_params)
 630   for key, val in update_dict.iteritems():
 631     if ((use_default and val == constants.VALUE_DEFAULT) or
 632         (use_none and val is None)):
 633       try:
 634         del params_copy[key]
 635       except KeyError:
 636         pass
 637     else:
 638       params_copy[key] = val
 639   return params_copy
 640
 641
 642 def _ReleaseLocks(lu, level, names=None, keep=None):
 643   """Releases locks owned by an LU.
 644
 645   @type lu: L{LogicalUnit}
 646   @param level: Lock level
 647   @type names: list or None
 648   @param names: Names of locks to release
 649   @type keep: list or None
 650   @param keep: Names of locks to retain
 651
 652   """
 653   assert not (keep is not None and names is not None), \
 654          "Only one of the 'names' and the 'keep' parameters can be given"
 655
 656   if names is not None:
 657     should_release = names.__contains__
 658   elif keep:
 659     should_release = lambda name: name not in keep
 660   else:
 661     should_release = None
 662
 663   if should_release:
 664     retain = []
 665     release = []
 666
 667     # Determine which locks to release
 668     for name in lu.glm.list_owned(level):
 669       if should_release(name):
 670         release.append(name)
 671       else:
 672         retain.append(name)
 673
 674     assert len(lu.glm.list_owned(level)) == (len(retain) + len(release))
 675
 676     # Release just some locks
 677     lu.glm.release(level, names=release)
 678
 679     assert frozenset(lu.glm.list_owned(level)) == frozenset(retain)
 680   else:
 681     # Release everything
 682     lu.glm.release(level)
 683
 684     assert not lu.glm.is_owned(level), "No locks should be owned"
 685
 686
 687 def _MapInstanceDisksToNodes(instances):
 688   """Creates a map from (node, volume) to instance name.
 689
 690   @type instances: list of L{objects.Instance}
 691   @rtype: dict; tuple of (node name, volume name) as key, instance name as value
 692
 693   """
 694   return dict(((node, vol), inst.name)
 695               for inst in instances
 696               for (node, vols) in inst.MapLVsByNode().items()
 697               for vol in vols)
 698
 699
 700 def _RunPostHook(lu, node_name):
 701   """Runs the post-hook for an opcode on a single node.
 702
 703   """
 704   hm = lu.proc.hmclass(lu.rpc.call_hooks_runner, lu)
 705   try:
 706     hm.RunPhase(constants.HOOKS_PHASE_POST, nodes=[node_name])
 707   except:
 708     # pylint: disable-msg=W0702
 709     lu.LogWarning("Errors occurred running hooks on %s" % node_name)
 710
 711
 712 def _CheckOutputFields(static, dynamic, selected):
 713   """Checks whether all selected fields are valid.
 714
 715   @type static: L{utils.FieldSet}
 716   @param static: static fields set
 717   @type dynamic: L{utils.FieldSet}
 718   @param dynamic: dynamic fields set
 719
 720   """
 721   f = utils.FieldSet()
 722   f.Extend(static)
 723   f.Extend(dynamic)
 724
 725   delta = f.NonMatching(selected)
 726   if delta:
 727     raise errors.OpPrereqError("Unknown output fields selected: %s"
 728                                % ",".join(delta), errors.ECODE_INVAL)
 729
 730
 731 def _CheckGlobalHvParams(params):
 732   """Validates that given hypervisor params are not global ones.
 733
 734   This will ensure that instances don't get customised versions of
 735   global params.
 736
 737   """
 738   used_globals = constants.HVC_GLOBALS.intersection(params)
 739   if used_globals:
 740     msg = ("The following hypervisor parameters are global and cannot"
 741            " be customized at instance level, please modify them at"
 742            " cluster level: %s" % utils.CommaJoin(used_globals))
 743     raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
 744
 745
 746 def _CheckNodeOnline(lu, node, msg=None):
 747   """Ensure that a given node is online.
 748
 749   @param lu: the LU on behalf of which we make the check
 750   @param node: the node to check
 751   @param msg: if passed, should be a message to replace the default one
 752   @raise errors.OpPrereqError: if the node is offline
 753
 754   """
 755   if msg is None:
 756     msg = "Can't use offline node"
 757   if lu.cfg.GetNodeInfo(node).offline:
 758     raise errors.OpPrereqError("%s: %s" % (msg, node), errors.ECODE_STATE)
 759
 760
 761 def _CheckNodeNotDrained(lu, node):
 762   """Ensure that a given node is not drained.
 763
 764   @param lu: the LU on behalf of which we make the check
 765   @param node: the node to check
 766   @raise errors.OpPrereqError: if the node is drained
 767
 768   """
 769   if lu.cfg.GetNodeInfo(node).drained:
 770     raise errors.OpPrereqError("Can't use drained node %s" % node,
 771                                errors.ECODE_STATE)
 772
 773
 774 def _CheckNodeVmCapable(lu, node):
 775   """Ensure that a given node is vm capable.
 776
 777   @param lu: the LU on behalf of which we make the check
 778   @param node: the node to check
 779   @raise errors.OpPrereqError: if the node is not vm capable
 780
 781   """
 782   if not lu.cfg.GetNodeInfo(node).vm_capable:
 783     raise errors.OpPrereqError("Can't use non-vm_capable node %s" % node,
 784                                errors.ECODE_STATE)
 785
 786
 787 def _CheckNodeHasOS(lu, node, os_name, force_variant):
 788   """Ensure that a node supports a given OS.
 789
 790   @param lu: the LU on behalf of which we make the check
 791   @param node: the node to check
 792   @param os_name: the OS to query about
 793   @param force_variant: whether to ignore variant errors
 794   @raise errors.OpPrereqError: if the node is not supporting the OS
 795
 796   """
 797   result = lu.rpc.call_os_get(node, os_name)
 798   result.Raise("OS '%s' not in supported OS list for node %s" %
 799                (os_name, node),
 800                prereq=True, ecode=errors.ECODE_INVAL)
 801   if not force_variant:
 802     _CheckOSVariant(result.payload, os_name)
 803
 804
 805 def _CheckNodeHasSecondaryIP(lu, node, secondary_ip, prereq):
 806   """Ensure that a node has the given secondary ip.
 807
 808   @type lu: L{LogicalUnit}
 809   @param lu: the LU on behalf of which we make the check
 810   @type node: string
 811   @param node: the node to check
 812   @type secondary_ip: string
 813   @param secondary_ip: the ip to check
 814   @type prereq: boolean
 815   @param prereq: whether to throw a prerequisite or an execute error
 816   @raise errors.OpPrereqError: if the node doesn't have the ip, and prereq=True
 817   @raise errors.OpExecError: if the node doesn't have the ip, and prereq=False
 818
 819   """
 820   result = lu.rpc.call_node_has_ip_address(node, secondary_ip)
 821   result.Raise("Failure checking secondary ip on node %s" % node,
 822                prereq=prereq, ecode=errors.ECODE_ENVIRON)
 823   if not result.payload:
 824     msg = ("Node claims it doesn't have the secondary ip you gave (%s),"
 825            " please fix and re-run this command" % secondary_ip)
 826     if prereq:
 827       raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
 828     else:
 829       raise errors.OpExecError(msg)
 830
 831
 832 def _GetClusterDomainSecret():
 833   """Reads the cluster domain secret.
 834
 835   """
 836   return utils.ReadOneLineFile(constants.CLUSTER_DOMAIN_SECRET_FILE,
 837                                strict=True)
 838
 839
 840 def _CheckInstanceDown(lu, instance, reason):
 841   """Ensure that an instance is not running."""
 842   if instance.admin_up:
 843     raise errors.OpPrereqError("Instance %s is marked to be up, %s" %
 844                                (instance.name, reason), errors.ECODE_STATE)
 845
 846   pnode = instance.primary_node
 847   ins_l = lu.rpc.call_instance_list([pnode], [instance.hypervisor])[pnode]
 848   ins_l.Raise("Can't contact node %s for instance information" % pnode,
 849               prereq=True, ecode=errors.ECODE_ENVIRON)
 850
 851   if instance.name in ins_l.payload:
 852     raise errors.OpPrereqError("Instance %s is running, %s" %
 853                                (instance.name, reason), errors.ECODE_STATE)
 854
 855
 856 def _ExpandItemName(fn, name, kind):
 857   """Expand an item name.
 858
 859   @param fn: the function to use for expansion
 860   @param name: requested item name
 861   @param kind: text description ('Node' or 'Instance')
 862   @return: the resolved (full) name
 863   @raise errors.OpPrereqError: if the item is not found
 864
 865   """
 866   full_name = fn(name)
 867   if full_name is None:
 868     raise errors.OpPrereqError("%s '%s' not known" % (kind, name),
 869                                errors.ECODE_NOENT)
 870   return full_name
 871
 872
 873 def _ExpandNodeName(cfg, name):
 874   """Wrapper over L{_ExpandItemName} for nodes."""
 875   return _ExpandItemName(cfg.ExpandNodeName, name, "Node")
 876
 877
 878 def _ExpandInstanceName(cfg, name):
 879   """Wrapper over L{_ExpandItemName} for instance."""
 880   return _ExpandItemName(cfg.ExpandInstanceName, name, "Instance")
 881
 882
 883 def _BuildInstanceHookEnv(name, primary_node, secondary_nodes, os_type, status,
 884                           memory, vcpus, nics, disk_template, disks,
 885                           bep, hvp, hypervisor_name, tags):
 886   """Builds instance related env variables for hooks
 887
 888   This builds the hook environment from individual variables.
 889
 890   @type name: string
 891   @param name: the name of the instance
 892   @type primary_node: string
 893   @param primary_node: the name of the instance's primary node
 894   @type secondary_nodes: list
 895   @param secondary_nodes: list of secondary nodes as strings
 896   @type os_type: string
 897   @param os_type: the name of the instance's OS
 898   @type status: boolean
 899   @param status: the should_run status of the instance
 900   @type memory: string
 901   @param memory: the memory size of the instance
 902   @type vcpus: string
 903   @param vcpus: the count of VCPUs the instance has
 904   @type nics: list
 905   @param nics: list of tuples (ip, mac, mode, link) representing
 906       the NICs the instance has
 907   @type disk_template: string
 908   @param disk_template: the disk template of the instance
 909   @type disks: list
 910   @param disks: the list of (size, mode) pairs
 911   @type bep: dict
 912   @param bep: the backend parameters for the instance
 913   @type hvp: dict
 914   @param hvp: the hypervisor parameters for the instance
 915   @type hypervisor_name: string
 916   @param hypervisor_name: the hypervisor for the instance
 917   @type tags: list
 918   @param tags: list of instance tags as strings
 919   @rtype: dict
 920   @return: the hook environment for this instance
 921
 922   """
 923   if status:
 924     str_status = "up"
 925   else:
 926     str_status = "down"
 927   env = {
 928     "OP_TARGET": name,
 929     "INSTANCE_NAME": name,
 930     "INSTANCE_PRIMARY": primary_node,
 931     "INSTANCE_SECONDARIES": " ".join(secondary_nodes),
 932     "INSTANCE_OS_TYPE": os_type,
 933     "INSTANCE_STATUS": str_status,
 934     "INSTANCE_MEMORY": memory,
 935     "INSTANCE_VCPUS": vcpus,
 936     "INSTANCE_DISK_TEMPLATE": disk_template,
 937     "INSTANCE_HYPERVISOR": hypervisor_name,
 938   }
 939
 940   if nics:
 941     nic_count = len(nics)
 942     for idx, (ip, mac, mode, link) in enumerate(nics):
 943       if ip is None:
 944         ip = ""
 945       env["INSTANCE_NIC%d_IP" % idx] = ip
 946       env["INSTANCE_NIC%d_MAC" % idx] = mac
 947       env["INSTANCE_NIC%d_MODE" % idx] = mode
 948       env["INSTANCE_NIC%d_LINK" % idx] = link
 949       if mode == constants.NIC_MODE_BRIDGED:
 950         env["INSTANCE_NIC%d_BRIDGE" % idx] = link
 951   else:
 952     nic_count = 0
 953
 954   env["INSTANCE_NIC_COUNT"] = nic_count
 955
 956   if disks:
 957     disk_count = len(disks)
 958     for idx, (size, mode) in enumerate(disks):
 959       env["INSTANCE_DISK%d_SIZE" % idx] = size
 960       env["INSTANCE_DISK%d_MODE" % idx] = mode
 961   else:
 962     disk_count = 0
 963
 964   env["INSTANCE_DISK_COUNT"] = disk_count
 965
 966   if not tags:
 967     tags = []
 968
 969   env["INSTANCE_TAGS"] = " ".join(tags)
 970
 971   for source, kind in [(bep, "BE"), (hvp, "HV")]:
 972     for key, value in source.items():
 973       env["INSTANCE_%s_%s" % (kind, key)] = value
 974
 975   return env
 976
 977
 978 def _NICListToTuple(lu, nics):
 979   """Build a list of nic information tuples.
 980
 981   This list is suitable to be passed to _BuildInstanceHookEnv or as a return
 982   value in LUInstanceQueryData.
 983
 984   @type lu:  L{LogicalUnit}
 985   @param lu: the logical unit on whose behalf we execute
 986   @type nics: list of L{objects.NIC}
 987   @param nics: list of nics to convert to hooks tuples
 988
 989   """
 990   hooks_nics = []
 991   cluster = lu.cfg.GetClusterInfo()
 992   for nic in nics:
 993     ip = nic.ip
 994     mac = nic.mac
 995     filled_params = cluster.SimpleFillNIC(nic.nicparams)
 996     mode = filled_params[constants.NIC_MODE]
 997     link = filled_params[constants.NIC_LINK]
 998     hooks_nics.append((ip, mac, mode, link))
 999   return hooks_nics
1000
1001
1002 def _BuildInstanceHookEnvByObject(lu, instance, override=None):
1003   """Builds instance related env variables for hooks from an object.
1004
1005   @type lu: L{LogicalUnit}
1006   @param lu: the logical unit on whose behalf we execute
1007   @type instance: L{objects.Instance}
1008   @param instance: the instance for which we should build the
1009       environment
1010   @type override: dict
1011   @param override: dictionary with key/values that will override
1012       our values
1013   @rtype: dict
1014   @return: the hook environment dictionary
1015
1016   """
1017   cluster = lu.cfg.GetClusterInfo()
1018   bep = cluster.FillBE(instance)
1019   hvp = cluster.FillHV(instance)
1020   args = {
1021     "name": instance.name,
1022     "primary_node": instance.primary_node,
1023     "secondary_nodes": instance.secondary_nodes,
1024     "os_type": instance.os,
1025     "status": instance.admin_up,
1026     "memory": bep[constants.BE_MEMORY],
1027     "vcpus": bep[constants.BE_VCPUS],
1028     "nics": _NICListToTuple(lu, instance.nics),
1029     "disk_template": instance.disk_template,
1030     "disks": [(disk.size, disk.mode) for disk in instance.disks],
1031     "bep": bep,
1032     "hvp": hvp,
1033     "hypervisor_name": instance.hypervisor,
1034     "tags": instance.tags,
1035   }
1036   if override:
1037     args.update(override)
1038   return _BuildInstanceHookEnv(**args) # pylint: disable-msg=W0142
1039
1040
1041 def _AdjustCandidatePool(lu, exceptions):
1042   """Adjust the candidate pool after node operations.
1043
1044   """
1045   mod_list = lu.cfg.MaintainCandidatePool(exceptions)
1046   if mod_list:
1047     lu.LogInfo("Promoted nodes to master candidate role: %s",
1048                utils.CommaJoin(node.name for node in mod_list))
1049     for name in mod_list:
1050       lu.context.ReaddNode(name)
1051   mc_now, mc_max, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1052   if mc_now > mc_max:
1053     lu.LogInfo("Note: more nodes are candidates (%d) than desired (%d)" %
1054                (mc_now, mc_max))
1055
1056
1057 def _DecideSelfPromotion(lu, exceptions=None):
1058   """Decide whether I should promote myself as a master candidate.
1059
1060   """
1061   cp_size = lu.cfg.GetClusterInfo().candidate_pool_size
1062   mc_now, mc_should, _ = lu.cfg.GetMasterCandidateStats(exceptions)
1063   # the new node will increase mc_max with one, so:
1064   mc_should = min(mc_should + 1, cp_size)
1065   return mc_now < mc_should
1066
1067
1068 def _CheckNicsBridgesExist(lu, target_nics, target_node):
1069   """Check that the brigdes needed by a list of nics exist.
1070
1071   """
1072   cluster = lu.cfg.GetClusterInfo()
1073   paramslist = [cluster.SimpleFillNIC(nic.nicparams) for nic in target_nics]
1074   brlist = [params[constants.NIC_LINK] for params in paramslist
1075             if params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED]
1076   if brlist:
1077     result = lu.rpc.call_bridges_exist(target_node, brlist)
1078     result.Raise("Error checking bridges on destination node '%s'" %
1079                  target_node, prereq=True, ecode=errors.ECODE_ENVIRON)
1080
1081
1082 def _CheckInstanceBridgesExist(lu, instance, node=None):
1083   """Check that the brigdes needed by an instance exist.
1084
1085   """
1086   if node is None:
1087     node = instance.primary_node
1088   _CheckNicsBridgesExist(lu, instance.nics, node)
1089
1090
1091 def _CheckOSVariant(os_obj, name):
1092   """Check whether an OS name conforms to the os variants specification.
1093
1094   @type os_obj: L{objects.OS}
1095   @param os_obj: OS object to check
1096   @type name: string
1097   @param name: OS name passed by the user, to check for validity
1098
1099   """
1100   variant = objects.OS.GetVariant(name)
1101   if not os_obj.supported_variants:
1102     if variant:
1103       raise errors.OpPrereqError("OS '%s' doesn't support variants ('%s'"
1104                                  " passed)" % (os_obj.name, variant),
1105                                  errors.ECODE_INVAL)
1106     return
1107   if not variant:
1108     raise errors.OpPrereqError("OS name must include a variant",
1109                                errors.ECODE_INVAL)
1110
1111   if variant not in os_obj.supported_variants:
1112     raise errors.OpPrereqError("Unsupported OS variant", errors.ECODE_INVAL)
1113
1114
1115 def _GetNodeInstancesInner(cfg, fn):
1116   return [i for i in cfg.GetAllInstancesInfo().values() if fn(i)]
1117
1118
1119 def _GetNodeInstances(cfg, node_name):
1120   """Returns a list of all primary and secondary instances on a node.
1121
1122   """
1123
1124   return _GetNodeInstancesInner(cfg, lambda inst: node_name in inst.all_nodes)
1125
1126
1127 def _GetNodePrimaryInstances(cfg, node_name):
1128   """Returns primary instances on a node.
1129
1130   """
1131   return _GetNodeInstancesInner(cfg,
1132                                 lambda inst: node_name == inst.primary_node)
1133
1134
1135 def _GetNodeSecondaryInstances(cfg, node_name):
1136   """Returns secondary instances on a node.
1137
1138   """
1139   return _GetNodeInstancesInner(cfg,
1140                                 lambda inst: node_name in inst.secondary_nodes)
1141
1142
1143 def _GetStorageTypeArgs(cfg, storage_type):
1144   """Returns the arguments for a storage type.
1145
1146   """
1147   # Special case for file storage
1148   if storage_type == constants.ST_FILE:
1149     # storage.FileStorage wants a list of storage directories
1150     return [[cfg.GetFileStorageDir(), cfg.GetSharedFileStorageDir()]]
1151
1152   return []
1153
1154
1155 def _FindFaultyInstanceDisks(cfg, rpc, instance, node_name, prereq):
1156   faulty = []
1157
1158   for dev in instance.disks:
1159     cfg.SetDiskID(dev, node_name)
1160
1161   result = rpc.call_blockdev_getmirrorstatus(node_name, instance.disks)
1162   result.Raise("Failed to get disk status from node %s" % node_name,
1163                prereq=prereq, ecode=errors.ECODE_ENVIRON)
1164
1165   for idx, bdev_status in enumerate(result.payload):
1166     if bdev_status and bdev_status.ldisk_status == constants.LDS_FAULTY:
1167       faulty.append(idx)
1168
1169   return faulty
1170
1171
1172 def _CheckIAllocatorOrNode(lu, iallocator_slot, node_slot):
1173   """Check the sanity of iallocator and node arguments and use the
1174   cluster-wide iallocator if appropriate.
1175
1176   Check that at most one of (iallocator, node) is specified. If none is
1177   specified, then the LU's opcode's iallocator slot is filled with the
1178   cluster-wide default iallocator.
1179
1180   @type iallocator_slot: string
1181   @param iallocator_slot: the name of the opcode iallocator slot
1182   @type node_slot: string
1183   @param node_slot: the name of the opcode target node slot
1184
1185   """
1186   node = getattr(lu.op, node_slot, None)
1187   iallocator = getattr(lu.op, iallocator_slot, None)
1188
1189   if node is not None and iallocator is not None:
1190     raise errors.OpPrereqError("Do not specify both, iallocator and node",
1191                                errors.ECODE_INVAL)
1192   elif node is None and iallocator is None:
1193     default_iallocator = lu.cfg.GetDefaultIAllocator()
1194     if default_iallocator:
1195       setattr(lu.op, iallocator_slot, default_iallocator)
1196     else:
1197       raise errors.OpPrereqError("No iallocator or node given and no"
1198                                  " cluster-wide default iallocator found;"
1199                                  " please specify either an iallocator or a"
1200                                  " node, or set a cluster-wide default"
1201                                  " iallocator")
1202
1203
1204 def _GetDefaultIAllocator(cfg, iallocator):
1205   """Decides on which iallocator to use.
1206
1207   @type cfg: L{config.ConfigWriter}
1208   @param cfg: Cluster configuration object
1209   @type iallocator: string or None
1210   @param iallocator: Iallocator specified in opcode
1211   @rtype: string
1212   @return: Iallocator name
1213
1214   """
1215   if not iallocator:
1216     # Use default iallocator
1217     iallocator = cfg.GetDefaultIAllocator()
1218
1219   if not iallocator:
1220     raise errors.OpPrereqError("No iallocator was specified, neither in the"
1221                                " opcode nor as a cluster-wide default",
1222                                errors.ECODE_INVAL)
1223
1224   return iallocator
1225
1226
1227 class LUClusterPostInit(LogicalUnit):
1228   """Logical unit for running hooks after cluster initialization.
1229
1230   """
1231   HPATH = "cluster-init"
1232   HTYPE = constants.HTYPE_CLUSTER
1233
1234   def BuildHooksEnv(self):
1235     """Build hooks env.
1236
1237     """
1238     return {
1239       "OP_TARGET": self.cfg.GetClusterName(),
1240       }
1241
1242   def BuildHooksNodes(self):
1243     """Build hooks nodes.
1244
1245     """
1246     return ([], [self.cfg.GetMasterNode()])
1247
1248   def Exec(self, feedback_fn):
1249     """Nothing to do.
1250
1251     """
1252     return True
1253
1254
1255 class LUClusterDestroy(LogicalUnit):
1256   """Logical unit for destroying the cluster.
1257
1258   """
1259   HPATH = "cluster-destroy"
1260   HTYPE = constants.HTYPE_CLUSTER
1261
1262   def BuildHooksEnv(self):
1263     """Build hooks env.
1264
1265     """
1266     return {
1267       "OP_TARGET": self.cfg.GetClusterName(),
1268       }
1269
1270   def BuildHooksNodes(self):
1271     """Build hooks nodes.
1272
1273     """
1274     return ([], [])
1275
1276   def CheckPrereq(self):
1277     """Check prerequisites.
1278
1279     This checks whether the cluster is empty.
1280
1281     Any errors are signaled by raising errors.OpPrereqError.
1282
1283     """
1284     master = self.cfg.GetMasterNode()
1285
1286     nodelist = self.cfg.GetNodeList()
1287     if len(nodelist) != 1 or nodelist[0] != master:
1288       raise errors.OpPrereqError("There are still %d node(s) in"
1289                                  " this cluster." % (len(nodelist) - 1),
1290                                  errors.ECODE_INVAL)
1291     instancelist = self.cfg.GetInstanceList()
1292     if instancelist:
1293       raise errors.OpPrereqError("There are still %d instance(s) in"
1294                                  " this cluster." % len(instancelist),
1295                                  errors.ECODE_INVAL)
1296
1297   def Exec(self, feedback_fn):
1298     """Destroys the cluster.
1299
1300     """
1301     master = self.cfg.GetMasterNode()
1302
1303     # Run post hooks on master node before it's removed
1304     _RunPostHook(self, master)
1305
1306     result = self.rpc.call_node_stop_master(master, False)
1307     result.Raise("Could not disable the master role")
1308
1309     return master
1310
1311
1312 def _VerifyCertificate(filename):
1313   """Verifies a certificate for L{LUClusterVerifyConfig}.
1314
1315   @type filename: string
1316   @param filename: Path to PEM file
1317
1318   """
1319   try:
1320     cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
1321                                            utils.ReadFile(filename))
1322   except Exception, err: # pylint: disable-msg=W0703
1323     return (LUClusterVerifyConfig.ETYPE_ERROR,
1324             "Failed to load X509 certificate %s: %s" % (filename, err))
1325
1326   (errcode, msg) = \
1327     utils.VerifyX509Certificate(cert, constants.SSL_CERT_EXPIRATION_WARN,
1328                                 constants.SSL_CERT_EXPIRATION_ERROR)
1329
1330   if msg:
1331     fnamemsg = "While verifying %s: %s" % (filename, msg)
1332   else:
1333     fnamemsg = None
1334
1335   if errcode is None:
1336     return (None, fnamemsg)
1337   elif errcode == utils.CERT_WARNING:
1338     return (LUClusterVerifyConfig.ETYPE_WARNING, fnamemsg)
1339   elif errcode == utils.CERT_ERROR:
1340     return (LUClusterVerifyConfig.ETYPE_ERROR, fnamemsg)
1341
1342   raise errors.ProgrammerError("Unhandled certificate error code %r" % errcode)
1343
1344
1345 def _GetAllHypervisorParameters(cluster, instances):
1346   """Compute the set of all hypervisor parameters.
1347
1348   @type cluster: L{objects.Cluster}
1349   @param cluster: the cluster object
1350   @param instances: list of L{objects.Instance}
1351   @param instances: additional instances from which to obtain parameters
1352   @rtype: list of (origin, hypervisor, parameters)
1353   @return: a list with all parameters found, indicating the hypervisor they
1354        apply to, and the origin (can be "cluster", "os X", or "instance Y")
1355
1356   """
1357   hvp_data = []
1358
1359   for hv_name in cluster.enabled_hypervisors:
1360     hvp_data.append(("cluster", hv_name, cluster.GetHVDefaults(hv_name)))
1361
1362   for os_name, os_hvp in cluster.os_hvp.items():
1363     for hv_name, hv_params in os_hvp.items():
1364       if hv_params:
1365         full_params = cluster.GetHVDefaults(hv_name, os_name=os_name)
1366         hvp_data.append(("os %s" % os_name, hv_name, full_params))
1367
1368   # TODO: collapse identical parameter values in a single one
1369   for instance in instances:
1370     if instance.hvparams:
1371       hvp_data.append(("instance %s" % instance.name, instance.hypervisor,
1372                        cluster.FillHV(instance)))
1373
1374   return hvp_data
1375
1376
1377 class _VerifyErrors(object):
1378   """Mix-in for cluster/group verify LUs.
1379
1380   It provides _Error and _ErrorIf, and updates the self.bad boolean. (Expects
1381   self.op and self._feedback_fn to be available.)
1382
1383   """
1384   TCLUSTER = "cluster"
1385   TNODE = "node"
1386   TINSTANCE = "instance"
1387
1388   ECLUSTERCFG = (TCLUSTER, "ECLUSTERCFG")
1389   ECLUSTERCERT = (TCLUSTER, "ECLUSTERCERT")
1390   ECLUSTERFILECHECK = (TCLUSTER, "ECLUSTERFILECHECK")
1391   ECLUSTERDANGLINGNODES = (TNODE, "ECLUSTERDANGLINGNODES")
1392   ECLUSTERDANGLINGINST = (TNODE, "ECLUSTERDANGLINGINST")
1393   EINSTANCEBADNODE = (TINSTANCE, "EINSTANCEBADNODE")
1394   EINSTANCEDOWN = (TINSTANCE, "EINSTANCEDOWN")
1395   EINSTANCELAYOUT = (TINSTANCE, "EINSTANCELAYOUT")
1396   EINSTANCEMISSINGDISK = (TINSTANCE, "EINSTANCEMISSINGDISK")
1397   EINSTANCEFAULTYDISK = (TINSTANCE, "EINSTANCEFAULTYDISK")
1398   EINSTANCEWRONGNODE = (TINSTANCE, "EINSTANCEWRONGNODE")
1399   EINSTANCESPLITGROUPS = (TINSTANCE, "EINSTANCESPLITGROUPS")
1400   ENODEDRBD = (TNODE, "ENODEDRBD")
1401   ENODEDRBDHELPER = (TNODE, "ENODEDRBDHELPER")
1402   ENODEFILECHECK = (TNODE, "ENODEFILECHECK")
1403   ENODEHOOKS = (TNODE, "ENODEHOOKS")
1404   ENODEHV = (TNODE, "ENODEHV")
1405   ENODELVM = (TNODE, "ENODELVM")
1406   ENODEN1 = (TNODE, "ENODEN1")
1407   ENODENET = (TNODE, "ENODENET")
1408   ENODEOS = (TNODE, "ENODEOS")
1409   ENODEORPHANINSTANCE = (TNODE, "ENODEORPHANINSTANCE")
1410   ENODEORPHANLV = (TNODE, "ENODEORPHANLV")
1411   ENODERPC = (TNODE, "ENODERPC")
1412   ENODESSH = (TNODE, "ENODESSH")
1413   ENODEVERSION = (TNODE, "ENODEVERSION")
1414   ENODESETUP = (TNODE, "ENODESETUP")
1415   ENODETIME = (TNODE, "ENODETIME")
1416   ENODEOOBPATH = (TNODE, "ENODEOOBPATH")
1417
1418   ETYPE_FIELD = "code"
1419   ETYPE_ERROR = "ERROR"
1420   ETYPE_WARNING = "WARNING"
1421
1422   def _Error(self, ecode, item, msg, *args, **kwargs):
1423     """Format an error message.
1424
1425     Based on the opcode's error_codes parameter, either format a
1426     parseable error code, or a simpler error string.
1427
1428     This must be called only from Exec and functions called from Exec.
1429
1430     """
1431     ltype = kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR)
1432     itype, etxt = ecode
1433     # first complete the msg
1434     if args:
1435       msg = msg % args
1436     # then format the whole message
1437     if self.op.error_codes: # This is a mix-in. pylint: disable-msg=E1101
1438       msg = "%s:%s:%s:%s:%s" % (ltype, etxt, itype, item, msg)
1439     else:
1440       if item:
1441         item = " " + item
1442       else:
1443         item = ""
1444       msg = "%s: %s%s: %s" % (ltype, itype, item, msg)
1445     # and finally report it via the feedback_fn
1446     self._feedback_fn("  - %s" % msg) # Mix-in. pylint: disable-msg=E1101
1447
1448   def _ErrorIf(self, cond, *args, **kwargs):
1449     """Log an error message if the passed condition is True.
1450
1451     """
1452     cond = (bool(cond)
1453             or self.op.debug_simulate_errors) # pylint: disable-msg=E1101
1454     if cond:
1455       self._Error(*args, **kwargs)
1456     # do not mark the operation as failed for WARN cases only
1457     if kwargs.get(self.ETYPE_FIELD, self.ETYPE_ERROR) == self.ETYPE_ERROR:
1458       self.bad = self.bad or cond
1459
1460
1461 class LUClusterVerifyConfig(NoHooksLU, _VerifyErrors):
1462   """Verifies the cluster config.
1463
1464   """
1465   REQ_BGL = True
1466
1467   def _VerifyHVP(self, hvp_data):
1468     """Verifies locally the syntax of the hypervisor parameters.
1469
1470     """
1471     for item, hv_name, hv_params in hvp_data:
1472       msg = ("hypervisor %s parameters syntax check (source %s): %%s" %
1473              (item, hv_name))
1474       try:
1475         hv_class = hypervisor.GetHypervisor(hv_name)
1476         utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
1477         hv_class.CheckParameterSyntax(hv_params)
1478       except errors.GenericError, err:
1479         self._ErrorIf(True, self.ECLUSTERCFG, None, msg % str(err))
1480
1481   def ExpandNames(self):
1482     # Information can be safely retrieved as the BGL is acquired in exclusive
1483     # mode
1484     self.all_group_info = self.cfg.GetAllNodeGroupsInfo()
1485     self.all_node_info = self.cfg.GetAllNodesInfo()
1486     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1487     self.needed_locks = {}
1488
1489   def Exec(self, feedback_fn):
1490     """Verify integrity of cluster, performing various test on nodes.
1491
1492     """
1493     self.bad = False
1494     self._feedback_fn = feedback_fn
1495
1496     feedback_fn("* Verifying cluster config")
1497
1498     for msg in self.cfg.VerifyConfig():
1499       self._ErrorIf(True, self.ECLUSTERCFG, None, msg)
1500
1501     feedback_fn("* Verifying cluster certificate files")
1502
1503     for cert_filename in constants.ALL_CERT_FILES:
1504       (errcode, msg) = _VerifyCertificate(cert_filename)
1505       self._ErrorIf(errcode, self.ECLUSTERCERT, None, msg, code=errcode)
1506
1507     feedback_fn("* Verifying hypervisor parameters")
1508
1509     self._VerifyHVP(_GetAllHypervisorParameters(self.cfg.GetClusterInfo(),
1510                                                 self.all_inst_info.values()))
1511
1512     feedback_fn("* Verifying all nodes belong to an existing group")
1513
1514     # We do this verification here because, should this bogus circumstance
1515     # occur, it would never be caught by VerifyGroup, which only acts on
1516     # nodes/instances reachable from existing node groups.
1517
1518     dangling_nodes = set(node.name for node in self.all_node_info.values()
1519                          if node.group not in self.all_group_info)
1520
1521     dangling_instances = {}
1522     no_node_instances = []
1523
1524     for inst in self.all_inst_info.values():
1525       if inst.primary_node in dangling_nodes:
1526         dangling_instances.setdefault(inst.primary_node, []).append(inst.name)
1527       elif inst.primary_node not in self.all_node_info:
1528         no_node_instances.append(inst.name)
1529
1530     pretty_dangling = [
1531         "%s (%s)" %
1532         (node.name,
1533          utils.CommaJoin(dangling_instances.get(node.name,
1534                                                 ["no instances"])))
1535         for node in dangling_nodes]
1536
1537     self._ErrorIf(bool(dangling_nodes), self.ECLUSTERDANGLINGNODES, None,
1538                   "the following nodes (and their instances) belong to a non"
1539                   " existing group: %s", utils.CommaJoin(pretty_dangling))
1540
1541     self._ErrorIf(bool(no_node_instances), self.ECLUSTERDANGLINGINST, None,
1542                   "the following instances have a non-existing primary-node:"
1543                   " %s", utils.CommaJoin(no_node_instances))
1544
1545     return (not self.bad, [g.name for g in self.all_group_info.values()])
1546
1547
1548 class LUClusterVerifyGroup(LogicalUnit, _VerifyErrors):
1549   """Verifies the status of a node group.
1550
1551   """
1552   HPATH = "cluster-verify"
1553   HTYPE = constants.HTYPE_CLUSTER
1554   REQ_BGL = False
1555
1556   _HOOKS_INDENT_RE = re.compile("^", re.M)
1557
1558   class NodeImage(object):
1559     """A class representing the logical and physical status of a node.
1560
1561     @type name: string
1562     @ivar name: the node name to which this object refers
1563     @ivar volumes: a structure as returned from
1564         L{ganeti.backend.GetVolumeList} (runtime)
1565     @ivar instances: a list of running instances (runtime)
1566     @ivar pinst: list of configured primary instances (config)
1567     @ivar sinst: list of configured secondary instances (config)
1568     @ivar sbp: dictionary of {primary-node: list of instances} for all
1569         instances for which this node is secondary (config)
1570     @ivar mfree: free memory, as reported by hypervisor (runtime)
1571     @ivar dfree: free disk, as reported by the node (runtime)
1572     @ivar offline: the offline status (config)
1573     @type rpc_fail: boolean
1574     @ivar rpc_fail: whether the RPC verify call was successfull (overall,
1575         not whether the individual keys were correct) (runtime)
1576     @type lvm_fail: boolean
1577     @ivar lvm_fail: whether the RPC call didn't return valid LVM data
1578     @type hyp_fail: boolean
1579     @ivar hyp_fail: whether the RPC call didn't return the instance list
1580     @type ghost: boolean
1581     @ivar ghost: whether this is a known node or not (config)
1582     @type os_fail: boolean
1583     @ivar os_fail: whether the RPC call didn't return valid OS data
1584     @type oslist: list
1585     @ivar oslist: list of OSes as diagnosed by DiagnoseOS
1586     @type vm_capable: boolean
1587     @ivar vm_capable: whether the node can host instances
1588
1589     """
1590     def __init__(self, offline=False, name=None, vm_capable=True):
1591       self.name = name
1592       self.volumes = {}
1593       self.instances = []
1594       self.pinst = []
1595       self.sinst = []
1596       self.sbp = {}
1597       self.mfree = 0
1598       self.dfree = 0
1599       self.offline = offline
1600       self.vm_capable = vm_capable
1601       self.rpc_fail = False
1602       self.lvm_fail = False
1603       self.hyp_fail = False
1604       self.ghost = False
1605       self.os_fail = False
1606       self.oslist = {}
1607
1608   def ExpandNames(self):
1609     # This raises errors.OpPrereqError on its own:
1610     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
1611
1612     # Get instances in node group; this is unsafe and needs verification later
1613     inst_names = self.cfg.GetNodeGroupInstances(self.group_uuid)
1614
1615     self.needed_locks = {
1616       locking.LEVEL_INSTANCE: inst_names,
1617       locking.LEVEL_NODEGROUP: [self.group_uuid],
1618       locking.LEVEL_NODE: [],
1619       }
1620
1621     self.share_locks = _ShareAll()
1622
1623   def DeclareLocks(self, level):
1624     if level == locking.LEVEL_NODE:
1625       # Get members of node group; this is unsafe and needs verification later
1626       nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1627
1628       all_inst_info = self.cfg.GetAllInstancesInfo()
1629
1630       # In Exec(), we warn about mirrored instances that have primary and
1631       # secondary living in separate node groups. To fully verify that
1632       # volumes for these instances are healthy, we will need to do an
1633       # extra call to their secondaries. We ensure here those nodes will
1634       # be locked.
1635       for inst in self.glm.list_owned(locking.LEVEL_INSTANCE):
1636         # Important: access only the instances whose lock is owned
1637         if all_inst_info[inst].disk_template in constants.DTS_INT_MIRROR:
1638           nodes.update(all_inst_info[inst].secondary_nodes)
1639
1640       self.needed_locks[locking.LEVEL_NODE] = nodes
1641
1642   def CheckPrereq(self):
1643     group_nodes = set(self.cfg.GetNodeGroup(self.group_uuid).members)
1644     group_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
1645
1646     unlocked_nodes = \
1647         group_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1648
1649     unlocked_instances = \
1650         group_instances.difference(self.glm.list_owned(locking.LEVEL_INSTANCE))
1651
1652     if unlocked_nodes:
1653       raise errors.OpPrereqError("Missing lock for nodes: %s" %
1654                                  utils.CommaJoin(unlocked_nodes))
1655
1656     if unlocked_instances:
1657       raise errors.OpPrereqError("Missing lock for instances: %s" %
1658                                  utils.CommaJoin(unlocked_instances))
1659
1660     self.all_node_info = self.cfg.GetAllNodesInfo()
1661     self.all_inst_info = self.cfg.GetAllInstancesInfo()
1662
1663     self.my_node_names = utils.NiceSort(group_nodes)
1664     self.my_inst_names = utils.NiceSort(group_instances)
1665
1666     self.my_node_info = dict((name, self.all_node_info[name])
1667                              for name in self.my_node_names)
1668
1669     self.my_inst_info = dict((name, self.all_inst_info[name])
1670                              for name in self.my_inst_names)
1671
1672     # We detect here the nodes that will need the extra RPC calls for verifying
1673     # split LV volumes; they should be locked.
1674     extra_lv_nodes = set()
1675
1676     for inst in self.my_inst_info.values():
1677       if inst.disk_template in constants.DTS_INT_MIRROR:
1678         group = self.my_node_info[inst.primary_node].group
1679         for nname in inst.secondary_nodes:
1680           if self.all_node_info[nname].group != group:
1681             extra_lv_nodes.add(nname)
1682
1683     unlocked_lv_nodes = \
1684         extra_lv_nodes.difference(self.glm.list_owned(locking.LEVEL_NODE))
1685
1686     if unlocked_lv_nodes:
1687       raise errors.OpPrereqError("these nodes could be locked: %s" %
1688                                  utils.CommaJoin(unlocked_lv_nodes))
1689     self.extra_lv_nodes = list(extra_lv_nodes)
1690
1691   def _VerifyNode(self, ninfo, nresult):
1692     """Perform some basic validation on data returned from a node.
1693
1694       - check the result data structure is well formed and has all the
1695         mandatory fields
1696       - check ganeti version
1697
1698     @type ninfo: L{objects.Node}
1699     @param ninfo: the node to check
1700     @param nresult: the results from the node
1701     @rtype: boolean
1702     @return: whether overall this call was successful (and we can expect
1703          reasonable values in the respose)
1704
1705     """
1706     node = ninfo.name
1707     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1708
1709     # main result, nresult should be a non-empty dict
1710     test = not nresult or not isinstance(nresult, dict)
1711     _ErrorIf(test, self.ENODERPC, node,
1712                   "unable to verify node: no data returned")
1713     if test:
1714       return False
1715
1716     # compares ganeti version
1717     local_version = constants.PROTOCOL_VERSION
1718     remote_version = nresult.get("version", None)
1719     test = not (remote_version and
1720                 isinstance(remote_version, (list, tuple)) and
1721                 len(remote_version) == 2)
1722     _ErrorIf(test, self.ENODERPC, node,
1723              "connection to node returned invalid data")
1724     if test:
1725       return False
1726
1727     test = local_version != remote_version[0]
1728     _ErrorIf(test, self.ENODEVERSION, node,
1729              "incompatible protocol versions: master %s,"
1730              " node %s", local_version, remote_version[0])
1731     if test:
1732       return False
1733
1734     # node seems compatible, we can actually try to look into its results
1735
1736     # full package version
1737     self._ErrorIf(constants.RELEASE_VERSION != remote_version[1],
1738                   self.ENODEVERSION, node,
1739                   "software version mismatch: master %s, node %s",
1740                   constants.RELEASE_VERSION, remote_version[1],
1741                   code=self.ETYPE_WARNING)
1742
1743     hyp_result = nresult.get(constants.NV_HYPERVISOR, None)
1744     if ninfo.vm_capable and isinstance(hyp_result, dict):
1745       for hv_name, hv_result in hyp_result.iteritems():
1746         test = hv_result is not None
1747         _ErrorIf(test, self.ENODEHV, node,
1748                  "hypervisor %s verify failure: '%s'", hv_name, hv_result)
1749
1750     hvp_result = nresult.get(constants.NV_HVPARAMS, None)
1751     if ninfo.vm_capable and isinstance(hvp_result, list):
1752       for item, hv_name, hv_result in hvp_result:
1753         _ErrorIf(True, self.ENODEHV, node,
1754                  "hypervisor %s parameter verify failure (source %s): %s",
1755                  hv_name, item, hv_result)
1756
1757     test = nresult.get(constants.NV_NODESETUP,
1758                        ["Missing NODESETUP results"])
1759     _ErrorIf(test, self.ENODESETUP, node, "node setup error: %s",
1760              "; ".join(test))
1761
1762     return True
1763
1764   def _VerifyNodeTime(self, ninfo, nresult,
1765                       nvinfo_starttime, nvinfo_endtime):
1766     """Check the node time.
1767
1768     @type ninfo: L{objects.Node}
1769     @param ninfo: the node to check
1770     @param nresult: the remote results for the node
1771     @param nvinfo_starttime: the start time of the RPC call
1772     @param nvinfo_endtime: the end time of the RPC call
1773
1774     """
1775     node = ninfo.name
1776     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1777
1778     ntime = nresult.get(constants.NV_TIME, None)
1779     try:
1780       ntime_merged = utils.MergeTime(ntime)
1781     except (ValueError, TypeError):
1782       _ErrorIf(True, self.ENODETIME, node, "Node returned invalid time")
1783       return
1784
1785     if ntime_merged < (nvinfo_starttime - constants.NODE_MAX_CLOCK_SKEW):
1786       ntime_diff = "%.01fs" % abs(nvinfo_starttime - ntime_merged)
1787     elif ntime_merged > (nvinfo_endtime + constants.NODE_MAX_CLOCK_SKEW):
1788       ntime_diff = "%.01fs" % abs(ntime_merged - nvinfo_endtime)
1789     else:
1790       ntime_diff = None
1791
1792     _ErrorIf(ntime_diff is not None, self.ENODETIME, node,
1793              "Node time diverges by at least %s from master node time",
1794              ntime_diff)
1795
1796   def _VerifyNodeLVM(self, ninfo, nresult, vg_name):
1797     """Check the node LVM results.
1798
1799     @type ninfo: L{objects.Node}
1800     @param ninfo: the node to check
1801     @param nresult: the remote results for the node
1802     @param vg_name: the configured VG name
1803
1804     """
1805     if vg_name is None:
1806       return
1807
1808     node = ninfo.name
1809     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1810
1811     # checks vg existence and size > 20G
1812     vglist = nresult.get(constants.NV_VGLIST, None)
1813     test = not vglist
1814     _ErrorIf(test, self.ENODELVM, node, "unable to check volume groups")
1815     if not test:
1816       vgstatus = utils.CheckVolumeGroupSize(vglist, vg_name,
1817                                             constants.MIN_VG_SIZE)
1818       _ErrorIf(vgstatus, self.ENODELVM, node, vgstatus)
1819
1820     # check pv names
1821     pvlist = nresult.get(constants.NV_PVLIST, None)
1822     test = pvlist is None
1823     _ErrorIf(test, self.ENODELVM, node, "Can't get PV list from node")
1824     if not test:
1825       # check that ':' is not present in PV names, since it's a
1826       # special character for lvcreate (denotes the range of PEs to
1827       # use on the PV)
1828       for _, pvname, owner_vg in pvlist:
1829         test = ":" in pvname
1830         _ErrorIf(test, self.ENODELVM, node, "Invalid character ':' in PV"
1831                  " '%s' of VG '%s'", pvname, owner_vg)
1832
1833   def _VerifyNodeBridges(self, ninfo, nresult, bridges):
1834     """Check the node bridges.
1835
1836     @type ninfo: L{objects.Node}
1837     @param ninfo: the node to check
1838     @param nresult: the remote results for the node
1839     @param bridges: the expected list of bridges
1840
1841     """
1842     if not bridges:
1843       return
1844
1845     node = ninfo.name
1846     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1847
1848     missing = nresult.get(constants.NV_BRIDGES, None)
1849     test = not isinstance(missing, list)
1850     _ErrorIf(test, self.ENODENET, node,
1851              "did not return valid bridge information")
1852     if not test:
1853       _ErrorIf(bool(missing), self.ENODENET, node, "missing bridges: %s" %
1854                utils.CommaJoin(sorted(missing)))
1855
1856   def _VerifyNodeNetwork(self, ninfo, nresult):
1857     """Check the node network connectivity results.
1858
1859     @type ninfo: L{objects.Node}
1860     @param ninfo: the node to check
1861     @param nresult: the remote results for the node
1862
1863     """
1864     node = ninfo.name
1865     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1866
1867     test = constants.NV_NODELIST not in nresult
1868     _ErrorIf(test, self.ENODESSH, node,
1869              "node hasn't returned node ssh connectivity data")
1870     if not test:
1871       if nresult[constants.NV_NODELIST]:
1872         for a_node, a_msg in nresult[constants.NV_NODELIST].items():
1873           _ErrorIf(True, self.ENODESSH, node,
1874                    "ssh communication with node '%s': %s", a_node, a_msg)
1875
1876     test = constants.NV_NODENETTEST not in nresult
1877     _ErrorIf(test, self.ENODENET, node,
1878              "node hasn't returned node tcp connectivity data")
1879     if not test:
1880       if nresult[constants.NV_NODENETTEST]:
1881         nlist = utils.NiceSort(nresult[constants.NV_NODENETTEST].keys())
1882         for anode in nlist:
1883           _ErrorIf(True, self.ENODENET, node,
1884                    "tcp communication with node '%s': %s",
1885                    anode, nresult[constants.NV_NODENETTEST][anode])
1886
1887     test = constants.NV_MASTERIP not in nresult
1888     _ErrorIf(test, self.ENODENET, node,
1889              "node hasn't returned node master IP reachability data")
1890     if not test:
1891       if not nresult[constants.NV_MASTERIP]:
1892         if node == self.master_node:
1893           msg = "the master node cannot reach the master IP (not configured?)"
1894         else:
1895           msg = "cannot reach the master IP"
1896         _ErrorIf(True, self.ENODENET, node, msg)
1897
1898   def _VerifyInstance(self, instance, instanceconfig, node_image,
1899                       diskstatus):
1900     """Verify an instance.
1901
1902     This function checks to see if the required block devices are
1903     available on the instance's node.
1904
1905     """
1906     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
1907     node_current = instanceconfig.primary_node
1908
1909     node_vol_should = {}
1910     instanceconfig.MapLVsByNode(node_vol_should)
1911
1912     for node in node_vol_should:
1913       n_img = node_image[node]
1914       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1915         # ignore missing volumes on offline or broken nodes
1916         continue
1917       for volume in node_vol_should[node]:
1918         test = volume not in n_img.volumes
1919         _ErrorIf(test, self.EINSTANCEMISSINGDISK, instance,
1920                  "volume %s missing on node %s", volume, node)
1921
1922     if instanceconfig.admin_up:
1923       pri_img = node_image[node_current]
1924       test = instance not in pri_img.instances and not pri_img.offline
1925       _ErrorIf(test, self.EINSTANCEDOWN, instance,
1926                "instance not running on its primary node %s",
1927                node_current)
1928
1929     diskdata = [(nname, success, status, idx)
1930                 for (nname, disks) in diskstatus.items()
1931                 for idx, (success, status) in enumerate(disks)]
1932
1933     for nname, success, bdev_status, idx in diskdata:
1934       # the 'ghost node' construction in Exec() ensures that we have a
1935       # node here
1936       snode = node_image[nname]
1937       bad_snode = snode.ghost or snode.offline
1938       _ErrorIf(instanceconfig.admin_up and not success and not bad_snode,
1939                self.EINSTANCEFAULTYDISK, instance,
1940                "couldn't retrieve status for disk/%s on %s: %s",
1941                idx, nname, bdev_status)
1942       _ErrorIf((instanceconfig.admin_up and success and
1943                 bdev_status.ldisk_status == constants.LDS_FAULTY),
1944                self.EINSTANCEFAULTYDISK, instance,
1945                "disk/%s on %s is faulty", idx, nname)
1946
1947   def _VerifyOrphanVolumes(self, node_vol_should, node_image, reserved):
1948     """Verify if there are any unknown volumes in the cluster.
1949
1950     The .os, .swap and backup volumes are ignored. All other volumes are
1951     reported as unknown.
1952
1953     @type reserved: L{ganeti.utils.FieldSet}
1954     @param reserved: a FieldSet of reserved volume names
1955
1956     """
1957     for node, n_img in node_image.items():
1958       if n_img.offline or n_img.rpc_fail or n_img.lvm_fail:
1959         # skip non-healthy nodes
1960         continue
1961       for volume in n_img.volumes:
1962         test = ((node not in node_vol_should or
1963                 volume not in node_vol_should[node]) and
1964                 not reserved.Matches(volume))
1965         self._ErrorIf(test, self.ENODEORPHANLV, node,
1966                       "volume %s is unknown", volume)
1967
1968   def _VerifyNPlusOneMemory(self, node_image, instance_cfg):
1969     """Verify N+1 Memory Resilience.
1970
1971     Check that if one single node dies we can still start all the
1972     instances it was primary for.
1973
1974     """
1975     cluster_info = self.cfg.GetClusterInfo()
1976     for node, n_img in node_image.items():
1977       # This code checks that every node which is now listed as
1978       # secondary has enough memory to host all instances it is
1979       # supposed to should a single other node in the cluster fail.
1980       # FIXME: not ready for failover to an arbitrary node
1981       # FIXME: does not support file-backed instances
1982       # WARNING: we currently take into account down instances as well
1983       # as up ones, considering that even if they're down someone
1984       # might want to start them even in the event of a node failure.
1985       if n_img.offline:
1986         # we're skipping offline nodes from the N+1 warning, since
1987         # most likely we don't have good memory infromation from them;
1988         # we already list instances living on such nodes, and that's
1989         # enough warning
1990         continue
1991       for prinode, instances in n_img.sbp.items():
1992         needed_mem = 0
1993         for instance in instances:
1994           bep = cluster_info.FillBE(instance_cfg[instance])
1995           if bep[constants.BE_AUTO_BALANCE]:
1996             needed_mem += bep[constants.BE_MEMORY]
1997         test = n_img.mfree < needed_mem
1998         self._ErrorIf(test, self.ENODEN1, node,
1999                       "not enough memory to accomodate instance failovers"
2000                       " should node %s fail (%dMiB needed, %dMiB available)",
2001                       prinode, needed_mem, n_img.mfree)
2002
2003   @classmethod
2004   def _VerifyFiles(cls, errorif, nodeinfo, master_node, all_nvinfo,
2005                    (files_all, files_all_opt, files_mc, files_vm)):
2006     """Verifies file checksums collected from all nodes.
2007
2008     @param errorif: Callback for reporting errors
2009     @param nodeinfo: List of L{objects.Node} objects
2010     @param master_node: Name of master node
2011     @param all_nvinfo: RPC results
2012
2013     """
2014     node_names = frozenset(node.name for node in nodeinfo if not node.offline)
2015
2016     assert master_node in node_names
2017     assert (len(files_all | files_all_opt | files_mc | files_vm) ==
2018             sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
2019            "Found file listed in more than one file list"
2020
2021     # Define functions determining which nodes to consider for a file
2022     file2nodefn = dict([(filename, fn)
2023       for (files, fn) in [(files_all, None),
2024                           (files_all_opt, None),
2025                           (files_mc, lambda node: (node.master_candidate or
2026                                                    node.name == master_node)),
2027                           (files_vm, lambda node: node.vm_capable)]
2028       for filename in files])
2029
2030     fileinfo = dict((filename, {}) for filename in file2nodefn.keys())
2031
2032     for node in nodeinfo:
2033       if node.offline:
2034         continue
2035
2036       nresult = all_nvinfo[node.name]
2037
2038       if nresult.fail_msg or not nresult.payload:
2039         node_files = None
2040       else:
2041         node_files = nresult.payload.get(constants.NV_FILELIST, None)
2042
2043       test = not (node_files and isinstance(node_files, dict))
2044       errorif(test, cls.ENODEFILECHECK, node.name,
2045               "Node did not return file checksum data")
2046       if test:
2047         continue
2048
2049       for (filename, checksum) in node_files.items():
2050         # Check if the file should be considered for a node
2051         fn = file2nodefn[filename]
2052         if fn is None or fn(node):
2053           fileinfo[filename].setdefault(checksum, set()).add(node.name)
2054
2055     for (filename, checksums) in fileinfo.items():
2056       assert compat.all(len(i) > 10 for i in checksums), "Invalid checksum"
2057
2058       # Nodes having the file
2059       with_file = frozenset(node_name
2060                             for nodes in fileinfo[filename].values()
2061                             for node_name in nodes)
2062
2063       # Nodes missing file
2064       missing_file = node_names - with_file
2065
2066       if filename in files_all_opt:
2067         # All or no nodes
2068         errorif(missing_file and missing_file != node_names,
2069                 cls.ECLUSTERFILECHECK, None,
2070                 "File %s is optional, but it must exist on all or no"
2071                 " nodes (not found on %s)",
2072                 filename, utils.CommaJoin(utils.NiceSort(missing_file)))
2073       else:
2074         errorif(missing_file, cls.ECLUSTERFILECHECK, None,
2075                 "File %s is missing from node(s) %s", filename,
2076                 utils.CommaJoin(utils.NiceSort(missing_file)))
2077
2078       # See if there are multiple versions of the file
2079       test = len(checksums) > 1
2080       if test:
2081         variants = ["variant %s on %s" %
2082                     (idx + 1, utils.CommaJoin(utils.NiceSort(nodes)))
2083                     for (idx, (checksum, nodes)) in
2084                       enumerate(sorted(checksums.items()))]
2085       else:
2086         variants = []
2087
2088       errorif(test, cls.ECLUSTERFILECHECK, None,
2089               "File %s found with %s different checksums (%s)",
2090               filename, len(checksums), "; ".join(variants))
2091
2092   def _VerifyNodeDrbd(self, ninfo, nresult, instanceinfo, drbd_helper,
2093                       drbd_map):
2094     """Verifies and the node DRBD status.
2095
2096     @type ninfo: L{objects.Node}
2097     @param ninfo: the node to check
2098     @param nresult: the remote results for the node
2099     @param instanceinfo: the dict of instances
2100     @param drbd_helper: the configured DRBD usermode helper
2101     @param drbd_map: the DRBD map as returned by
2102         L{ganeti.config.ConfigWriter.ComputeDRBDMap}
2103
2104     """
2105     node = ninfo.name
2106     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2107
2108     if drbd_helper:
2109       helper_result = nresult.get(constants.NV_DRBDHELPER, None)
2110       test = (helper_result == None)
2111       _ErrorIf(test, self.ENODEDRBDHELPER, node,
2112                "no drbd usermode helper returned")
2113       if helper_result:
2114         status, payload = helper_result
2115         test = not status
2116         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2117                  "drbd usermode helper check unsuccessful: %s", payload)
2118         test = status and (payload != drbd_helper)
2119         _ErrorIf(test, self.ENODEDRBDHELPER, node,
2120                  "wrong drbd usermode helper: %s", payload)
2121
2122     # compute the DRBD minors
2123     node_drbd = {}
2124     for minor, instance in drbd_map[node].items():
2125       test = instance not in instanceinfo
2126       _ErrorIf(test, self.ECLUSTERCFG, None,
2127                "ghost instance '%s' in temporary DRBD map", instance)
2128         # ghost instance should not be running, but otherwise we
2129         # don't give double warnings (both ghost instance and
2130         # unallocated minor in use)
2131       if test:
2132         node_drbd[minor] = (instance, False)
2133       else:
2134         instance = instanceinfo[instance]
2135         node_drbd[minor] = (instance.name, instance.admin_up)
2136
2137     # and now check them
2138     used_minors = nresult.get(constants.NV_DRBDLIST, [])
2139     test = not isinstance(used_minors, (tuple, list))
2140     _ErrorIf(test, self.ENODEDRBD, node,
2141              "cannot parse drbd status file: %s", str(used_minors))
2142     if test:
2143       # we cannot check drbd status
2144       return
2145
2146     for minor, (iname, must_exist) in node_drbd.items():
2147       test = minor not in used_minors and must_exist
2148       _ErrorIf(test, self.ENODEDRBD, node,
2149                "drbd minor %d of instance %s is not active", minor, iname)
2150     for minor in used_minors:
2151       test = minor not in node_drbd
2152       _ErrorIf(test, self.ENODEDRBD, node,
2153                "unallocated drbd minor %d is in use", minor)
2154
2155   def _UpdateNodeOS(self, ninfo, nresult, nimg):
2156     """Builds the node OS structures.
2157
2158     @type ninfo: L{objects.Node}
2159     @param ninfo: the node to check
2160     @param nresult: the remote results for the node
2161     @param nimg: the node image object
2162
2163     """
2164     node = ninfo.name
2165     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2166
2167     remote_os = nresult.get(constants.NV_OSLIST, None)
2168     test = (not isinstance(remote_os, list) or
2169             not compat.all(isinstance(v, list) and len(v) == 7
2170                            for v in remote_os))
2171
2172     _ErrorIf(test, self.ENODEOS, node,
2173              "node hasn't returned valid OS data")
2174
2175     nimg.os_fail = test
2176
2177     if test:
2178       return
2179
2180     os_dict = {}
2181
2182     for (name, os_path, status, diagnose,
2183          variants, parameters, api_ver) in nresult[constants.NV_OSLIST]:
2184
2185       if name not in os_dict:
2186         os_dict[name] = []
2187
2188       # parameters is a list of lists instead of list of tuples due to
2189       # JSON lacking a real tuple type, fix it:
2190       parameters = [tuple(v) for v in parameters]
2191       os_dict[name].append((os_path, status, diagnose,
2192                             set(variants), set(parameters), set(api_ver)))
2193
2194     nimg.oslist = os_dict
2195
2196   def _VerifyNodeOS(self, ninfo, nimg, base):
2197     """Verifies the node OS list.
2198
2199     @type ninfo: L{objects.Node}
2200     @param ninfo: the node to check
2201     @param nimg: the node image object
2202     @param base: the 'template' node we match against (e.g. from the master)
2203
2204     """
2205     node = ninfo.name
2206     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2207
2208     assert not nimg.os_fail, "Entered _VerifyNodeOS with failed OS rpc?"
2209
2210     beautify_params = lambda l: ["%s: %s" % (k, v) for (k, v) in l]
2211     for os_name, os_data in nimg.oslist.items():
2212       assert os_data, "Empty OS status for OS %s?!" % os_name
2213       f_path, f_status, f_diag, f_var, f_param, f_api = os_data[0]
2214       _ErrorIf(not f_status, self.ENODEOS, node,
2215                "Invalid OS %s (located at %s): %s", os_name, f_path, f_diag)
2216       _ErrorIf(len(os_data) > 1, self.ENODEOS, node,
2217                "OS '%s' has multiple entries (first one shadows the rest): %s",
2218                os_name, utils.CommaJoin([v[0] for v in os_data]))
2219       # comparisons with the 'base' image
2220       test = os_name not in base.oslist
2221       _ErrorIf(test, self.ENODEOS, node,
2222                "Extra OS %s not present on reference node (%s)",
2223                os_name, base.name)
2224       if test:
2225         continue
2226       assert base.oslist[os_name], "Base node has empty OS status?"
2227       _, b_status, _, b_var, b_param, b_api = base.oslist[os_name][0]
2228       if not b_status:
2229         # base OS is invalid, skipping
2230         continue
2231       for kind, a, b in [("API version", f_api, b_api),
2232                          ("variants list", f_var, b_var),
2233                          ("parameters", beautify_params(f_param),
2234                           beautify_params(b_param))]:
2235         _ErrorIf(a != b, self.ENODEOS, node,
2236                  "OS %s for %s differs from reference node %s: [%s] vs. [%s]",
2237                  kind, os_name, base.name,
2238                  utils.CommaJoin(sorted(a)), utils.CommaJoin(sorted(b)))
2239
2240     # check any missing OSes
2241     missing = set(base.oslist.keys()).difference(nimg.oslist.keys())
2242     _ErrorIf(missing, self.ENODEOS, node,
2243              "OSes present on reference node %s but missing on this node: %s",
2244              base.name, utils.CommaJoin(missing))
2245
2246   def _VerifyOob(self, ninfo, nresult):
2247     """Verifies out of band functionality of a node.
2248
2249     @type ninfo: L{objects.Node}
2250     @param ninfo: the node to check
2251     @param nresult: the remote results for the node
2252
2253     """
2254     node = ninfo.name
2255     # We just have to verify the paths on master and/or master candidates
2256     # as the oob helper is invoked on the master
2257     if ((ninfo.master_candidate or ninfo.master_capable) and
2258         constants.NV_OOB_PATHS in nresult):
2259       for path_result in nresult[constants.NV_OOB_PATHS]:
2260         self._ErrorIf(path_result, self.ENODEOOBPATH, node, path_result)
2261
2262   def _UpdateNodeVolumes(self, ninfo, nresult, nimg, vg_name):
2263     """Verifies and updates the node volume data.
2264
2265     This function will update a L{NodeImage}'s internal structures
2266     with data from the remote call.
2267
2268     @type ninfo: L{objects.Node}
2269     @param ninfo: the node to check
2270     @param nresult: the remote results for the node
2271     @param nimg: the node image object
2272     @param vg_name: the configured VG name
2273
2274     """
2275     node = ninfo.name
2276     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2277
2278     nimg.lvm_fail = True
2279     lvdata = nresult.get(constants.NV_LVLIST, "Missing LV data")
2280     if vg_name is None:
2281       pass
2282     elif isinstance(lvdata, basestring):
2283       _ErrorIf(True, self.ENODELVM, node, "LVM problem on node: %s",
2284                utils.SafeEncode(lvdata))
2285     elif not isinstance(lvdata, dict):
2286       _ErrorIf(True, self.ENODELVM, node, "rpc call to node failed (lvlist)")
2287     else:
2288       nimg.volumes = lvdata
2289       nimg.lvm_fail = False
2290
2291   def _UpdateNodeInstances(self, ninfo, nresult, nimg):
2292     """Verifies and updates the node instance list.
2293
2294     If the listing was successful, then updates this node's instance
2295     list. Otherwise, it marks the RPC call as failed for the instance
2296     list key.
2297
2298     @type ninfo: L{objects.Node}
2299     @param ninfo: the node to check
2300     @param nresult: the remote results for the node
2301     @param nimg: the node image object
2302
2303     """
2304     idata = nresult.get(constants.NV_INSTANCELIST, None)
2305     test = not isinstance(idata, list)
2306     self._ErrorIf(test, self.ENODEHV, ninfo.name, "rpc call to node failed"
2307                   " (instancelist): %s", utils.SafeEncode(str(idata)))
2308     if test:
2309       nimg.hyp_fail = True
2310     else:
2311       nimg.instances = idata
2312
2313   def _UpdateNodeInfo(self, ninfo, nresult, nimg, vg_name):
2314     """Verifies and computes a node information map
2315
2316     @type ninfo: L{objects.Node}
2317     @param ninfo: the node to check
2318     @param nresult: the remote results for the node
2319     @param nimg: the node image object
2320     @param vg_name: the configured VG name
2321
2322     """
2323     node = ninfo.name
2324     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2325
2326     # try to read free memory (from the hypervisor)
2327     hv_info = nresult.get(constants.NV_HVINFO, None)
2328     test = not isinstance(hv_info, dict) or "memory_free" not in hv_info
2329     _ErrorIf(test, self.ENODEHV, node, "rpc call to node failed (hvinfo)")
2330     if not test:
2331       try:
2332         nimg.mfree = int(hv_info["memory_free"])
2333       except (ValueError, TypeError):
2334         _ErrorIf(True, self.ENODERPC, node,
2335                  "node returned invalid nodeinfo, check hypervisor")
2336
2337     # FIXME: devise a free space model for file based instances as well
2338     if vg_name is not None:
2339       test = (constants.NV_VGLIST not in nresult or
2340               vg_name not in nresult[constants.NV_VGLIST])
2341       _ErrorIf(test, self.ENODELVM, node,
2342                "node didn't return data for the volume group '%s'"
2343                " - it is either missing or broken", vg_name)
2344       if not test:
2345         try:
2346           nimg.dfree = int(nresult[constants.NV_VGLIST][vg_name])
2347         except (ValueError, TypeError):
2348           _ErrorIf(True, self.ENODERPC, node,
2349                    "node returned invalid LVM info, check LVM status")
2350
2351   def _CollectDiskInfo(self, nodelist, node_image, instanceinfo):
2352     """Gets per-disk status information for all instances.
2353
2354     @type nodelist: list of strings
2355     @param nodelist: Node names
2356     @type node_image: dict of (name, L{objects.Node})
2357     @param node_image: Node objects
2358     @type instanceinfo: dict of (name, L{objects.Instance})
2359     @param instanceinfo: Instance objects
2360     @rtype: {instance: {node: [(succes, payload)]}}
2361     @return: a dictionary of per-instance dictionaries with nodes as
2362         keys and disk information as values; the disk information is a
2363         list of tuples (success, payload)
2364
2365     """
2366     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2367
2368     node_disks = {}
2369     node_disks_devonly = {}
2370     diskless_instances = set()
2371     diskless = constants.DT_DISKLESS
2372
2373     for nname in nodelist:
2374       node_instances = list(itertools.chain(node_image[nname].pinst,
2375                                             node_image[nname].sinst))
2376       diskless_instances.update(inst for inst in node_instances
2377                                 if instanceinfo[inst].disk_template == diskless)
2378       disks = [(inst, disk)
2379                for inst in node_instances
2380                for disk in instanceinfo[inst].disks]
2381
2382       if not disks:
2383         # No need to collect data
2384         continue
2385
2386       node_disks[nname] = disks
2387
2388       # Creating copies as SetDiskID below will modify the objects and that can
2389       # lead to incorrect data returned from nodes
2390       devonly = [dev.Copy() for (_, dev) in disks]
2391
2392       for dev in devonly:
2393         self.cfg.SetDiskID(dev, nname)
2394
2395       node_disks_devonly[nname] = devonly
2396
2397     assert len(node_disks) == len(node_disks_devonly)
2398
2399     # Collect data from all nodes with disks
2400     result = self.rpc.call_blockdev_getmirrorstatus_multi(node_disks.keys(),
2401                                                           node_disks_devonly)
2402
2403     assert len(result) == len(node_disks)
2404
2405     instdisk = {}
2406
2407     for (nname, nres) in result.items():
2408       disks = node_disks[nname]
2409
2410       if nres.offline:
2411         # No data from this node
2412         data = len(disks) * [(False, "node offline")]
2413       else:
2414         msg = nres.fail_msg
2415         _ErrorIf(msg, self.ENODERPC, nname,
2416                  "while getting disk information: %s", msg)
2417         if msg:
2418           # No data from this node
2419           data = len(disks) * [(False, msg)]
2420         else:
2421           data = []
2422           for idx, i in enumerate(nres.payload):
2423             if isinstance(i, (tuple, list)) and len(i) == 2:
2424               data.append(i)
2425             else:
2426               logging.warning("Invalid result from node %s, entry %d: %s",
2427                               nname, idx, i)
2428               data.append((False, "Invalid result from the remote node"))
2429
2430       for ((inst, _), status) in zip(disks, data):
2431         instdisk.setdefault(inst, {}).setdefault(nname, []).append(status)
2432
2433     # Add empty entries for diskless instances.
2434     for inst in diskless_instances:
2435       assert inst not in instdisk
2436       instdisk[inst] = {}
2437
2438     assert compat.all(len(statuses) == len(instanceinfo[inst].disks) and
2439                       len(nnames) <= len(instanceinfo[inst].all_nodes) and
2440                       compat.all(isinstance(s, (tuple, list)) and
2441                                  len(s) == 2 for s in statuses)
2442                       for inst, nnames in instdisk.items()
2443                       for nname, statuses in nnames.items())
2444     assert set(instdisk) == set(instanceinfo), "instdisk consistency failure"
2445
2446     return instdisk
2447
2448   def BuildHooksEnv(self):
2449     """Build hooks env.
2450
2451     Cluster-Verify hooks just ran in the post phase and their failure makes
2452     the output be logged in the verify output and the verification to fail.
2453
2454     """
2455     env = {
2456       "CLUSTER_TAGS": " ".join(self.cfg.GetClusterInfo().GetTags())
2457       }
2458
2459     env.update(("NODE_TAGS_%s" % node.name, " ".join(node.GetTags()))
2460                for node in self.my_node_info.values())
2461
2462     return env
2463
2464   def BuildHooksNodes(self):
2465     """Build hooks nodes.
2466
2467     """
2468     return ([], self.my_node_names)
2469
2470   def Exec(self, feedback_fn):
2471     """Verify integrity of the node group, performing various test on nodes.
2472
2473     """
2474     # This method has too many local variables. pylint: disable-msg=R0914
2475
2476     if not self.my_node_names:
2477       # empty node group
2478       feedback_fn("* Empty node group, skipping verification")
2479       return True
2480
2481     self.bad = False
2482     _ErrorIf = self._ErrorIf # pylint: disable-msg=C0103
2483     verbose = self.op.verbose
2484     self._feedback_fn = feedback_fn
2485
2486     vg_name = self.cfg.GetVGName()
2487     drbd_helper = self.cfg.GetDRBDHelper()
2488     cluster = self.cfg.GetClusterInfo()
2489     groupinfo = self.cfg.GetAllNodeGroupsInfo()
2490     hypervisors = cluster.enabled_hypervisors
2491     node_data_list = [self.my_node_info[name] for name in self.my_node_names]
2492
2493     i_non_redundant = [] # Non redundant instances
2494     i_non_a_balanced = [] # Non auto-balanced instances
2495     n_offline = 0 # Count of offline nodes
2496     n_drained = 0 # Count of nodes being drained
2497     node_vol_should = {}
2498
2499     # FIXME: verify OS list
2500
2501     # File verification
2502     filemap = _ComputeAncillaryFiles(cluster, False)
2503
2504     # do local checksums
2505     master_node = self.master_node = self.cfg.GetMasterNode()
2506     master_ip = self.cfg.GetMasterIP()
2507
2508     feedback_fn("* Gathering data (%d nodes)" % len(self.my_node_names))
2509
2510     # We will make nodes contact all nodes in their group, and one node from
2511     # every other group.
2512     # TODO: should it be a *random* node, different every time?
2513     online_nodes = [node.name for node in node_data_list if not node.offline]
2514     other_group_nodes = {}
2515
2516     for name in sorted(self.all_node_info):
2517       node = self.all_node_info[name]
2518       if (node.group not in other_group_nodes
2519           and node.group != self.group_uuid
2520           and not node.offline):
2521         other_group_nodes[node.group] = node.name
2522
2523     node_verify_param = {
2524       constants.NV_FILELIST:
2525         utils.UniqueSequence(filename
2526                              for files in filemap
2527                              for filename in files),
2528       constants.NV_NODELIST: online_nodes + other_group_nodes.values(),
2529       constants.NV_HYPERVISOR: hypervisors,
2530       constants.NV_HVPARAMS:
2531         _GetAllHypervisorParameters(cluster, self.all_inst_info.values()),
2532       constants.NV_NODENETTEST: [(node.name, node.primary_ip, node.secondary_ip)
2533                                  for node in node_data_list
2534                                  if not node.offline],
2535       constants.NV_INSTANCELIST: hypervisors,
2536       constants.NV_VERSION: None,
2537       constants.NV_HVINFO: self.cfg.GetHypervisorType(),
2538       constants.NV_NODESETUP: None,
2539       constants.NV_TIME: None,
2540       constants.NV_MASTERIP: (master_node, master_ip),
2541       constants.NV_OSLIST: None,
2542       constants.NV_VMNODES: self.cfg.GetNonVmCapableNodeList(),
2543       }
2544
2545     if vg_name is not None:
2546       node_verify_param[constants.NV_VGLIST] = None
2547       node_verify_param[constants.NV_LVLIST] = vg_name
2548       node_verify_param[constants.NV_PVLIST] = [vg_name]
2549       node_verify_param[constants.NV_DRBDLIST] = None
2550
2551     if drbd_helper:
2552       node_verify_param[constants.NV_DRBDHELPER] = drbd_helper
2553
2554     # bridge checks
2555     # FIXME: this needs to be changed per node-group, not cluster-wide
2556     bridges = set()
2557     default_nicpp = cluster.nicparams[constants.PP_DEFAULT]
2558     if default_nicpp[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2559       bridges.add(default_nicpp[constants.NIC_LINK])
2560     for instance in self.my_inst_info.values():
2561       for nic in instance.nics:
2562         full_nic = cluster.SimpleFillNIC(nic.nicparams)
2563         if full_nic[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
2564           bridges.add(full_nic[constants.NIC_LINK])
2565
2566     if bridges:
2567       node_verify_param[constants.NV_BRIDGES] = list(bridges)
2568
2569     # Build our expected cluster state
2570     node_image = dict((node.name, self.NodeImage(offline=node.offline,
2571                                                  name=node.name,
2572                                                  vm_capable=node.vm_capable))
2573                       for node in node_data_list)
2574
2575     # Gather OOB paths
2576     oob_paths = []
2577     for node in self.all_node_info.values():
2578       path = _SupportsOob(self.cfg, node)
2579       if path and path not in oob_paths:
2580         oob_paths.append(path)
2581
2582     if oob_paths:
2583       node_verify_param[constants.NV_OOB_PATHS] = oob_paths
2584
2585     for instance in self.my_inst_names:
2586       inst_config = self.my_inst_info[instance]
2587
2588       for nname in inst_config.all_nodes:
2589         if nname not in node_image:
2590           gnode = self.NodeImage(name=nname)
2591           gnode.ghost = (nname not in self.all_node_info)
2592           node_image[nname] = gnode
2593
2594       inst_config.MapLVsByNode(node_vol_should)
2595
2596       pnode = inst_config.primary_node
2597       node_image[pnode].pinst.append(instance)
2598
2599       for snode in inst_config.secondary_nodes:
2600         nimg = node_image[snode]
2601         nimg.sinst.append(instance)
2602         if pnode not in nimg.sbp:
2603           nimg.sbp[pnode] = []
2604         nimg.sbp[pnode].append(instance)
2605
2606     # At this point, we have the in-memory data structures complete,
2607     # except for the runtime information, which we'll gather next
2608
2609     # Due to the way our RPC system works, exact response times cannot be
2610     # guaranteed (e.g. a broken node could run into a timeout). By keeping the
2611     # time before and after executing the request, we can at least have a time
2612     # window.
2613     nvinfo_starttime = time.time()
2614     all_nvinfo = self.rpc.call_node_verify(self.my_node_names,
2615                                            node_verify_param,
2616                                            self.cfg.GetClusterName())
2617     nvinfo_endtime = time.time()
2618
2619     if self.extra_lv_nodes and vg_name is not None:
2620       extra_lv_nvinfo = \
2621           self.rpc.call_node_verify(self.extra_lv_nodes,
2622                                     {constants.NV_LVLIST: vg_name},
2623                                     self.cfg.GetClusterName())
2624     else:
2625       extra_lv_nvinfo = {}
2626
2627     all_drbd_map = self.cfg.ComputeDRBDMap()
2628
2629     feedback_fn("* Gathering disk information (%s nodes)" %
2630                 len(self.my_node_names))
2631     instdisk = self._CollectDiskInfo(self.my_node_names, node_image,
2632                                      self.my_inst_info)
2633
2634     feedback_fn("* Verifying configuration file consistency")
2635
2636     # If not all nodes are being checked, we need to make sure the master node
2637     # and a non-checked vm_capable node are in the list.
2638     absent_nodes = set(self.all_node_info).difference(self.my_node_info)
2639     if absent_nodes:
2640       vf_nvinfo = all_nvinfo.copy()
2641       vf_node_info = list(self.my_node_info.values())
2642       additional_nodes = []
2643       if master_node not in self.my_node_info:
2644         additional_nodes.append(master_node)
2645         vf_node_info.append(self.all_node_info[master_node])
2646       # Add the first vm_capable node we find which is not included
2647       for node in absent_nodes:
2648         nodeinfo = self.all_node_info[node]
2649         if nodeinfo.vm_capable and not nodeinfo.offline:
2650           additional_nodes.append(node)
2651           vf_node_info.append(self.all_node_info[node])
2652           break
2653       key = constants.NV_FILELIST
2654       vf_nvinfo.update(self.rpc.call_node_verify(additional_nodes,
2655                                                  {key: node_verify_param[key]},
2656                                                  self.cfg.GetClusterName()))
2657     else:
2658       vf_nvinfo = all_nvinfo
2659       vf_node_info = self.my_node_info.values()
2660
2661     self._VerifyFiles(_ErrorIf, vf_node_info, master_node, vf_nvinfo, filemap)
2662
2663     feedback_fn("* Verifying node status")
2664
2665     refos_img = None
2666
2667     for node_i in node_data_list:
2668       node = node_i.name
2669       nimg = node_image[node]
2670
2671       if node_i.offline:
2672         if verbose:
2673           feedback_fn("* Skipping offline node %s" % (node,))
2674         n_offline += 1
2675         continue
2676
2677       if node == master_node:
2678         ntype = "master"
2679       elif node_i.master_candidate:
2680         ntype = "master candidate"
2681       elif node_i.drained:
2682         ntype = "drained"
2683         n_drained += 1
2684       else:
2685         ntype = "regular"
2686       if verbose:
2687         feedback_fn("* Verifying node %s (%s)" % (node, ntype))
2688
2689       msg = all_nvinfo[node].fail_msg
2690       _ErrorIf(msg, self.ENODERPC, node, "while contacting node: %s", msg)
2691       if msg:
2692         nimg.rpc_fail = True
2693         continue
2694
2695       nresult = all_nvinfo[node].payload
2696
2697       nimg.call_ok = self._VerifyNode(node_i, nresult)
2698       self._VerifyNodeTime(node_i, nresult, nvinfo_starttime, nvinfo_endtime)
2699       self._VerifyNodeNetwork(node_i, nresult)
2700       self._VerifyOob(node_i, nresult)
2701
2702       if nimg.vm_capable:
2703         self._VerifyNodeLVM(node_i, nresult, vg_name)
2704         self._VerifyNodeDrbd(node_i, nresult, self.all_inst_info, drbd_helper,
2705                              all_drbd_map)
2706
2707         self._UpdateNodeVolumes(node_i, nresult, nimg, vg_name)
2708         self._UpdateNodeInstances(node_i, nresult, nimg)
2709         self._UpdateNodeInfo(node_i, nresult, nimg, vg_name)
2710         self._UpdateNodeOS(node_i, nresult, nimg)
2711
2712         if not nimg.os_fail:
2713           if refos_img is None:
2714             refos_img = nimg
2715           self._VerifyNodeOS(node_i, nimg, refos_img)
2716         self._VerifyNodeBridges(node_i, nresult, bridges)
2717
2718         # Check whether all running instancies are primary for the node. (This
2719         # can no longer be done from _VerifyInstance below, since some of the
2720         # wrong instances could be from other node groups.)
2721         non_primary_inst = set(nimg.instances).difference(nimg.pinst)
2722
2723         for inst in non_primary_inst:
2724           test = inst in self.all_inst_info
2725           _ErrorIf(test, self.EINSTANCEWRONGNODE, inst,
2726                    "instance should not run on node %s", node_i.name)
2727           _ErrorIf(not test, self.ENODEORPHANINSTANCE, node_i.name,
2728                    "node is running unknown instance %s", inst)
2729
2730     for node, result in extra_lv_nvinfo.items():
2731       self._UpdateNodeVolumes(self.all_node_info[node], result.payload,
2732                               node_image[node], vg_name)
2733
2734     feedback_fn("* Verifying instance status")
2735     for instance in self.my_inst_names:
2736       if verbose:
2737         feedback_fn("* Verifying instance %s" % instance)
2738       inst_config = self.my_inst_info[instance]
2739       self._VerifyInstance(instance, inst_config, node_image,
2740                            instdisk[instance])
2741       inst_nodes_offline = []
2742
2743       pnode = inst_config.primary_node
2744       pnode_img = node_image[pnode]
2745       _ErrorIf(pnode_img.rpc_fail and not pnode_img.offline,
2746                self.ENODERPC, pnode, "instance %s, connection to"
2747                " primary node failed", instance)
2748
2749       _ErrorIf(inst_config.admin_up and pnode_img.offline,
2750                self.EINSTANCEBADNODE, instance,
2751                "instance is marked as running and lives on offline node %s",
2752                inst_config.primary_node)
2753
2754       # If the instance is non-redundant we cannot survive losing its primary
2755       # node, so we are not N+1 compliant. On the other hand we have no disk
2756       # templates with more than one secondary so that situation is not well
2757       # supported either.
2758       # FIXME: does not support file-backed instances
2759       if not inst_config.secondary_nodes:
2760         i_non_redundant.append(instance)
2761
2762       _ErrorIf(len(inst_config.secondary_nodes) > 1, self.EINSTANCELAYOUT,
2763                instance, "instance has multiple secondary nodes: %s",
2764                utils.CommaJoin(inst_config.secondary_nodes),
2765                code=self.ETYPE_WARNING)
2766
2767       if inst_config.disk_template in constants.DTS_INT_MIRROR:
2768         pnode = inst_config.primary_node
2769         instance_nodes = utils.NiceSort(inst_config.all_nodes)
2770         instance_groups = {}
2771
2772         for node in instance_nodes:
2773           instance_groups.setdefault(self.all_node_info[node].group,
2774                                      []).append(node)
2775
2776         pretty_list = [
2777           "%s (group %s)" % (utils.CommaJoin(nodes), groupinfo[group].name)
2778           # Sort so that we always list the primary node first.
2779           for group, nodes in sorted(instance_groups.items(),
2780                                      key=lambda (_, nodes): pnode in nodes,
2781                                      reverse=True)]
2782
2783         self._ErrorIf(len(instance_groups) > 1, self.EINSTANCESPLITGROUPS,
2784                       instance, "instance has primary and secondary nodes in"
2785                       " different groups: %s", utils.CommaJoin(pretty_list),
2786                       code=self.ETYPE_WARNING)
2787
2788       if not cluster.FillBE(inst_config)[constants.BE_AUTO_BALANCE]:
2789         i_non_a_balanced.append(instance)
2790
2791       for snode in inst_config.secondary_nodes:
2792         s_img = node_image[snode]
2793         _ErrorIf(s_img.rpc_fail and not s_img.offline, self.ENODERPC, snode,
2794                  "instance %s, connection to secondary node failed", instance)
2795
2796         if s_img.offline:
2797           inst_nodes_offline.append(snode)
2798
2799       # warn that the instance lives on offline nodes
2800       _ErrorIf(inst_nodes_offline, self.EINSTANCEBADNODE, instance,
2801                "instance has offline secondary node(s) %s",
2802                utils.CommaJoin(inst_nodes_offline))
2803       # ... or ghost/non-vm_capable nodes
2804       for node in inst_config.all_nodes:
2805         _ErrorIf(node_image[node].ghost, self.EINSTANCEBADNODE, instance,
2806                  "instance lives on ghost node %s", node)
2807         _ErrorIf(not node_image[node].vm_capable, self.EINSTANCEBADNODE,
2808                  instance, "instance lives on non-vm_capable node %s", node)
2809
2810     feedback_fn("* Verifying orphan volumes")
2811     reserved = utils.FieldSet(*cluster.reserved_lvs)
2812
2813     # We will get spurious "unknown volume" warnings if any node of this group
2814     # is secondary for an instance whose primary is in another group. To avoid
2815     # them, we find these instances and add their volumes to node_vol_should.
2816     for inst in self.all_inst_info.values():
2817       for secondary in inst.secondary_nodes:
2818         if (secondary in self.my_node_info
2819             and inst.name not in self.my_inst_info):
2820           inst.MapLVsByNode(node_vol_should)
2821           break
2822
2823     self._VerifyOrphanVolumes(node_vol_should, node_image, reserved)
2824
2825     if constants.VERIFY_NPLUSONE_MEM not in self.op.skip_checks:
2826       feedback_fn("* Verifying N+1 Memory redundancy")
2827       self._VerifyNPlusOneMemory(node_image, self.my_inst_info)
2828
2829     feedback_fn("* Other Notes")
2830     if i_non_redundant:
2831       feedback_fn("  - NOTICE: %d non-redundant instance(s) found."
2832                   % len(i_non_redundant))
2833
2834     if i_non_a_balanced:
2835       feedback_fn("  - NOTICE: %d non-auto-balanced instance(s) found."
2836                   % len(i_non_a_balanced))
2837
2838     if n_offline:
2839       feedback_fn("  - NOTICE: %d offline node(s) found." % n_offline)
2840
2841     if n_drained:
2842       feedback_fn("  - NOTICE: %d drained node(s) found." % n_drained)
2843
2844     return not self.bad
2845
2846   def HooksCallBack(self, phase, hooks_results, feedback_fn, lu_result):
2847     """Analyze the post-hooks' result
2848
2849     This method analyses the hook result, handles it, and sends some
2850     nicely-formatted feedback back to the user.
2851
2852     @param phase: one of L{constants.HOOKS_PHASE_POST} or
2853         L{constants.HOOKS_PHASE_PRE}; it denotes the hooks phase
2854     @param hooks_results: the results of the multi-node hooks rpc call
2855     @param feedback_fn: function used send feedback back to the caller
2856     @param lu_result: previous Exec result
2857     @return: the new Exec result, based on the previous result
2858         and hook results
2859
2860     """
2861     # We only really run POST phase hooks, only for non-empty groups,
2862     # and are only interested in their results
2863     if not self.my_node_names:
2864       # empty node group
2865       pass
2866     elif phase == constants.HOOKS_PHASE_POST:
2867       # Used to change hooks' output to proper indentation
2868       feedback_fn("* Hooks Results")
2869       assert hooks_results, "invalid result from hooks"
2870
2871       for node_name in hooks_results:
2872         res = hooks_results[node_name]
2873         msg = res.fail_msg
2874         test = msg and not res.offline
2875         self._ErrorIf(test, self.ENODEHOOKS, node_name,
2876                       "Communication failure in hooks execution: %s", msg)
2877         if res.offline or msg:
2878           # No need to investigate payload if node is offline or gave an error.
2879           # override manually lu_result here as _ErrorIf only
2880           # overrides self.bad
2881           lu_result = 1
2882           continue
2883         for script, hkr, output in res.payload:
2884           test = hkr == constants.HKR_FAIL
2885           self._ErrorIf(test, self.ENODEHOOKS, node_name,
2886                         "Script %s failed, output:", script)
2887           if test:
2888             output = self._HOOKS_INDENT_RE.sub("      ", output)
2889             feedback_fn("%s" % output)
2890             lu_result = 0
2891
2892     return lu_result
2893
2894
2895 class LUClusterVerifyDisks(NoHooksLU):
2896   """Verifies the cluster disks status.
2897
2898   """
2899   REQ_BGL = False
2900
2901   def ExpandNames(self):
2902     self.share_locks = _ShareAll()
2903     self.needed_locks = {
2904       locking.LEVEL_NODEGROUP: locking.ALL_SET,
2905       }
2906
2907   def Exec(self, feedback_fn):
2908     group_names = self.glm.list_owned(locking.LEVEL_NODEGROUP)
2909
2910     # Submit one instance of L{opcodes.OpGroupVerifyDisks} per node group
2911     return ResultWithJobs([[opcodes.OpGroupVerifyDisks(group_name=group)]
2912                            for group in group_names])
2913
2914
2915 class LUGroupVerifyDisks(NoHooksLU):
2916   """Verifies the status of all disks in a node group.
2917
2918   """
2919   REQ_BGL = False
2920
2921   def ExpandNames(self):
2922     # Raises errors.OpPrereqError on its own if group can't be found
2923     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
2924
2925     self.share_locks = _ShareAll()
2926     self.needed_locks = {
2927       locking.LEVEL_INSTANCE: [],
2928       locking.LEVEL_NODEGROUP: [],
2929       locking.LEVEL_NODE: [],
2930       }
2931
2932   def DeclareLocks(self, level):
2933     if level == locking.LEVEL_INSTANCE:
2934       assert not self.needed_locks[locking.LEVEL_INSTANCE]
2935
2936       # Lock instances optimistically, needs verification once node and group
2937       # locks have been acquired
2938       self.needed_locks[locking.LEVEL_INSTANCE] = \
2939         self.cfg.GetNodeGroupInstances(self.group_uuid)
2940
2941     elif level == locking.LEVEL_NODEGROUP:
2942       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
2943
2944       self.needed_locks[locking.LEVEL_NODEGROUP] = \
2945         set([self.group_uuid] +
2946             # Lock all groups used by instances optimistically; this requires
2947             # going via the node before it's locked, requiring verification
2948             # later on
2949             [group_uuid
2950              for instance_name in
2951                self.glm.list_owned(locking.LEVEL_INSTANCE)
2952              for group_uuid in
2953                self.cfg.GetInstanceNodeGroups(instance_name)])
2954
2955     elif level == locking.LEVEL_NODE:
2956       # This will only lock the nodes in the group to be verified which contain
2957       # actual instances
2958       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
2959       self._LockInstancesNodes()
2960
2961       # Lock all nodes in group to be verified
2962       assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
2963       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
2964       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
2965
2966   def CheckPrereq(self):
2967     owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
2968     owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
2969     owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
2970
2971     assert self.group_uuid in owned_groups
2972
2973     # Check if locked instances are still correct
2974     wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
2975     if owned_instances != wanted_instances:
2976       raise errors.OpPrereqError("Instances in node group %s changed since"
2977                                  " locks were acquired, wanted %s, have %s;"
2978                                  " retry the operation" %
2979                                  (self.op.group_name,
2980                                   utils.CommaJoin(wanted_instances),
2981                                   utils.CommaJoin(owned_instances)),
2982                                  errors.ECODE_STATE)
2983
2984     # Get instance information
2985     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
2986
2987     # Check if node groups for locked instances are still correct
2988     for (instance_name, inst) in self.instances.items():
2989       assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
2990         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
2991       assert owned_nodes.issuperset(inst.all_nodes), \
2992         "Instance %s's nodes changed while we kept the lock" % instance_name
2993
2994       inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
2995       if not owned_groups.issuperset(inst_groups):
2996         raise errors.OpPrereqError("Instance %s's node groups changed since"
2997                                    " locks were acquired, current groups are"
2998                                    " are '%s', owning groups '%s'; retry the"
2999                                    " operation" %
3000                                    (instance_name,
3001                                     utils.CommaJoin(inst_groups),
3002                                     utils.CommaJoin(owned_groups)),
3003                                    errors.ECODE_STATE)
3004
3005   def Exec(self, feedback_fn):
3006     """Verify integrity of cluster disks.
3007
3008     @rtype: tuple of three items
3009     @return: a tuple of (dict of node-to-node_error, list of instances
3010         which need activate-disks, dict of instance: (node, volume) for
3011         missing volumes
3012
3013     """
3014     res_nodes = {}
3015     res_instances = set()
3016     res_missing = {}
3017
3018     nv_dict = _MapInstanceDisksToNodes([inst
3019                                         for inst in self.instances.values()
3020                                         if inst.admin_up])
3021
3022     if nv_dict:
3023       nodes = utils.NiceSort(set(self.glm.list_owned(locking.LEVEL_NODE)) &
3024                              set(self.cfg.GetVmCapableNodeList()))
3025
3026       node_lvs = self.rpc.call_lv_list(nodes, [])
3027
3028       for (node, node_res) in node_lvs.items():
3029         if node_res.offline:
3030           continue
3031
3032         msg = node_res.fail_msg
3033         if msg:
3034           logging.warning("Error enumerating LVs on node %s: %s", node, msg)
3035           res_nodes[node] = msg
3036           continue
3037
3038         for lv_name, (_, _, lv_online) in node_res.payload.items():
3039           inst = nv_dict.pop((node, lv_name), None)
3040           if not (lv_online or inst is None):
3041             res_instances.add(inst)
3042
3043       # any leftover items in nv_dict are missing LVs, let's arrange the data
3044       # better
3045       for key, inst in nv_dict.iteritems():
3046         res_missing.setdefault(inst, []).append(key)
3047
3048     return (res_nodes, list(res_instances), res_missing)
3049
3050
3051 class LUClusterRepairDiskSizes(NoHooksLU):
3052   """Verifies the cluster disks sizes.
3053
3054   """
3055   REQ_BGL = False
3056
3057   def ExpandNames(self):
3058     if self.op.instances:
3059       self.wanted_names = _GetWantedInstances(self, self.op.instances)
3060       self.needed_locks = {
3061         locking.LEVEL_NODE: [],
3062         locking.LEVEL_INSTANCE: self.wanted_names,
3063         }
3064       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
3065     else:
3066       self.wanted_names = None
3067       self.needed_locks = {
3068         locking.LEVEL_NODE: locking.ALL_SET,
3069         locking.LEVEL_INSTANCE: locking.ALL_SET,
3070         }
3071     self.share_locks = _ShareAll()
3072
3073   def DeclareLocks(self, level):
3074     if level == locking.LEVEL_NODE and self.wanted_names is not None:
3075       self._LockInstancesNodes(primary_only=True)
3076
3077   def CheckPrereq(self):
3078     """Check prerequisites.
3079
3080     This only checks the optional instance list against the existing names.
3081
3082     """
3083     if self.wanted_names is None:
3084       self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
3085
3086     self.wanted_instances = \
3087         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
3088
3089   def _EnsureChildSizes(self, disk):
3090     """Ensure children of the disk have the needed disk size.
3091
3092     This is valid mainly for DRBD8 and fixes an issue where the
3093     children have smaller disk size.
3094
3095     @param disk: an L{ganeti.objects.Disk} object
3096
3097     """
3098     if disk.dev_type == constants.LD_DRBD8:
3099       assert disk.children, "Empty children for DRBD8?"
3100       fchild = disk.children[0]
3101       mismatch = fchild.size < disk.size
3102       if mismatch:
3103         self.LogInfo("Child disk has size %d, parent %d, fixing",
3104                      fchild.size, disk.size)
3105         fchild.size = disk.size
3106
3107       # and we recurse on this child only, not on the metadev
3108       return self._EnsureChildSizes(fchild) or mismatch
3109     else:
3110       return False
3111
3112   def Exec(self, feedback_fn):
3113     """Verify the size of cluster disks.
3114
3115     """
3116     # TODO: check child disks too
3117     # TODO: check differences in size between primary/secondary nodes
3118     per_node_disks = {}
3119     for instance in self.wanted_instances:
3120       pnode = instance.primary_node
3121       if pnode not in per_node_disks:
3122         per_node_disks[pnode] = []
3123       for idx, disk in enumerate(instance.disks):
3124         per_node_disks[pnode].append((instance, idx, disk))
3125
3126     changed = []
3127     for node, dskl in per_node_disks.items():
3128       newl = [v[2].Copy() for v in dskl]
3129       for dsk in newl:
3130         self.cfg.SetDiskID(dsk, node)
3131       result = self.rpc.call_blockdev_getsize(node, newl)
3132       if result.fail_msg:
3133         self.LogWarning("Failure in blockdev_getsize call to node"
3134                         " %s, ignoring", node)
3135         continue
3136       if len(result.payload) != len(dskl):
3137         logging.warning("Invalid result from node %s: len(dksl)=%d,"
3138                         " result.payload=%s", node, len(dskl), result.payload)
3139         self.LogWarning("Invalid result from node %s, ignoring node results",
3140                         node)
3141         continue
3142       for ((instance, idx, disk), size) in zip(dskl, result.payload):
3143         if size is None:
3144           self.LogWarning("Disk %d of instance %s did not return size"
3145                           " information, ignoring", idx, instance.name)
3146           continue
3147         if not isinstance(size, (int, long)):
3148           self.LogWarning("Disk %d of instance %s did not return valid"
3149                           " size information, ignoring", idx, instance.name)
3150           continue
3151         size = size >> 20
3152         if size != disk.size:
3153           self.LogInfo("Disk %d of instance %s has mismatched size,"
3154                        " correcting: recorded %d, actual %d", idx,
3155                        instance.name, disk.size, size)
3156           disk.size = size
3157           self.cfg.Update(instance, feedback_fn)
3158           changed.append((instance.name, idx, size))
3159         if self._EnsureChildSizes(disk):
3160           self.cfg.Update(instance, feedback_fn)
3161           changed.append((instance.name, idx, disk.size))
3162     return changed
3163
3164
3165 class LUClusterRename(LogicalUnit):
3166   """Rename the cluster.
3167
3168   """
3169   HPATH = "cluster-rename"
3170   HTYPE = constants.HTYPE_CLUSTER
3171
3172   def BuildHooksEnv(self):
3173     """Build hooks env.
3174
3175     """
3176     return {
3177       "OP_TARGET": self.cfg.GetClusterName(),
3178       "NEW_NAME": self.op.name,
3179       }
3180
3181   def BuildHooksNodes(self):
3182     """Build hooks nodes.
3183
3184     """
3185     return ([self.cfg.GetMasterNode()], self.cfg.GetNodeList())
3186
3187   def CheckPrereq(self):
3188     """Verify that the passed name is a valid one.
3189
3190     """
3191     hostname = netutils.GetHostname(name=self.op.name,
3192                                     family=self.cfg.GetPrimaryIPFamily())
3193
3194     new_name = hostname.name
3195     self.ip = new_ip = hostname.ip
3196     old_name = self.cfg.GetClusterName()
3197     old_ip = self.cfg.GetMasterIP()
3198     if new_name == old_name and new_ip == old_ip:
3199       raise errors.OpPrereqError("Neither the name nor the IP address of the"
3200                                  " cluster has changed",
3201                                  errors.ECODE_INVAL)
3202     if new_ip != old_ip:
3203       if netutils.TcpPing(new_ip, constants.DEFAULT_NODED_PORT):
3204         raise errors.OpPrereqError("The given cluster IP address (%s) is"
3205                                    " reachable on the network" %
3206                                    new_ip, errors.ECODE_NOTUNIQUE)
3207
3208     self.op.name = new_name
3209
3210   def Exec(self, feedback_fn):
3211     """Rename the cluster.
3212
3213     """
3214     clustername = self.op.name
3215     ip = self.ip
3216
3217     # shutdown the master IP
3218     master = self.cfg.GetMasterNode()
3219     result = self.rpc.call_node_stop_master(master, False)
3220     result.Raise("Could not disable the master role")
3221
3222     try:
3223       cluster = self.cfg.GetClusterInfo()
3224       cluster.cluster_name = clustername
3225       cluster.master_ip = ip
3226       self.cfg.Update(cluster, feedback_fn)
3227
3228       # update the known hosts file
3229       ssh.WriteKnownHostsFile(self.cfg, constants.SSH_KNOWN_HOSTS_FILE)
3230       node_list = self.cfg.GetOnlineNodeList()
3231       try:
3232         node_list.remove(master)
3233       except ValueError:
3234         pass
3235       _UploadHelper(self, node_list, constants.SSH_KNOWN_HOSTS_FILE)
3236     finally:
3237       result = self.rpc.call_node_start_master(master, False, False)
3238       msg = result.fail_msg
3239       if msg:
3240         self.LogWarning("Could not re-enable the master role on"
3241                         " the master, please restart manually: %s", msg)
3242
3243     return clustername
3244
3245
3246 class LUClusterSetParams(LogicalUnit):
3247   """Change the parameters of the cluster.
3248
3249   """
3250   HPATH = "cluster-modify"
3251   HTYPE = constants.HTYPE_CLUSTER
3252   REQ_BGL = False
3253
3254   def CheckArguments(self):
3255     """Check parameters
3256
3257     """
3258     if self.op.uid_pool:
3259       uidpool.CheckUidPool(self.op.uid_pool)
3260
3261     if self.op.add_uids:
3262       uidpool.CheckUidPool(self.op.add_uids)
3263
3264     if self.op.remove_uids:
3265       uidpool.CheckUidPool(self.op.remove_uids)
3266
3267   def ExpandNames(self):
3268     # FIXME: in the future maybe other cluster params won't require checking on
3269     # all nodes to be modified.
3270     self.needed_locks = {
3271       locking.LEVEL_NODE: locking.ALL_SET,
3272     }
3273     self.share_locks[locking.LEVEL_NODE] = 1
3274
3275   def BuildHooksEnv(self):
3276     """Build hooks env.
3277
3278     """
3279     return {
3280       "OP_TARGET": self.cfg.GetClusterName(),
3281       "NEW_VG_NAME": self.op.vg_name,
3282       }
3283
3284   def BuildHooksNodes(self):
3285     """Build hooks nodes.
3286
3287     """
3288     mn = self.cfg.GetMasterNode()
3289     return ([mn], [mn])
3290
3291   def CheckPrereq(self):
3292     """Check prerequisites.
3293
3294     This checks whether the given params don't conflict and
3295     if the given volume group is valid.
3296
3297     """
3298     if self.op.vg_name is not None and not self.op.vg_name:
3299       if self.cfg.HasAnyDiskOfType(constants.LD_LV):
3300         raise errors.OpPrereqError("Cannot disable lvm storage while lvm-based"
3301                                    " instances exist", errors.ECODE_INVAL)
3302
3303     if self.op.drbd_helper is not None and not self.op.drbd_helper:
3304       if self.cfg.HasAnyDiskOfType(constants.LD_DRBD8):
3305         raise errors.OpPrereqError("Cannot disable drbd helper while"
3306                                    " drbd-based instances exist",
3307                                    errors.ECODE_INVAL)
3308
3309     node_list = self.glm.list_owned(locking.LEVEL_NODE)
3310
3311     # if vg_name not None, checks given volume group on all nodes
3312     if self.op.vg_name:
3313       vglist = self.rpc.call_vg_list(node_list)
3314       for node in node_list:
3315         msg = vglist[node].fail_msg
3316         if msg:
3317           # ignoring down node
3318           self.LogWarning("Error while gathering data on node %s"
3319                           " (ignoring node): %s", node, msg)
3320           continue
3321         vgstatus = utils.CheckVolumeGroupSize(vglist[node].payload,
3322                                               self.op.vg_name,
3323                                               constants.MIN_VG_SIZE)
3324         if vgstatus:
3325           raise errors.OpPrereqError("Error on node '%s': %s" %
3326                                      (node, vgstatus), errors.ECODE_ENVIRON)
3327
3328     if self.op.drbd_helper:
3329       # checks given drbd helper on all nodes
3330       helpers = self.rpc.call_drbd_helper(node_list)
3331       for (node, ninfo) in self.cfg.GetMultiNodeInfo(node_list):
3332         if ninfo.offline:
3333           self.LogInfo("Not checking drbd helper on offline node %s", node)
3334           continue
3335         msg = helpers[node].fail_msg
3336         if msg:
3337           raise errors.OpPrereqError("Error checking drbd helper on node"
3338                                      " '%s': %s" % (node, msg),
3339                                      errors.ECODE_ENVIRON)
3340         node_helper = helpers[node].payload
3341         if node_helper != self.op.drbd_helper:
3342           raise errors.OpPrereqError("Error on node '%s': drbd helper is %s" %
3343                                      (node, node_helper), errors.ECODE_ENVIRON)
3344
3345     self.cluster = cluster = self.cfg.GetClusterInfo()
3346     # validate params changes
3347     if self.op.beparams:
3348       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
3349       self.new_beparams = cluster.SimpleFillBE(self.op.beparams)
3350
3351     if self.op.ndparams:
3352       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
3353       self.new_ndparams = cluster.SimpleFillND(self.op.ndparams)
3354
3355       # TODO: we need a more general way to handle resetting
3356       # cluster-level parameters to default values
3357       if self.new_ndparams["oob_program"] == "":
3358         self.new_ndparams["oob_program"] = \
3359             constants.NDC_DEFAULTS[constants.ND_OOB_PROGRAM]
3360
3361     if self.op.nicparams:
3362       utils.ForceDictType(self.op.nicparams, constants.NICS_PARAMETER_TYPES)
3363       self.new_nicparams = cluster.SimpleFillNIC(self.op.nicparams)
3364       objects.NIC.CheckParameterSyntax(self.new_nicparams)
3365       nic_errors = []
3366
3367       # check all instances for consistency
3368       for instance in self.cfg.GetAllInstancesInfo().values():
3369         for nic_idx, nic in enumerate(instance.nics):
3370           params_copy = copy.deepcopy(nic.nicparams)
3371           params_filled = objects.FillDict(self.new_nicparams, params_copy)
3372
3373           # check parameter syntax
3374           try:
3375             objects.NIC.CheckParameterSyntax(params_filled)
3376           except errors.ConfigurationError, err:
3377             nic_errors.append("Instance %s, nic/%d: %s" %
3378                               (instance.name, nic_idx, err))
3379
3380           # if we're moving instances to routed, check that they have an ip
3381           target_mode = params_filled[constants.NIC_MODE]
3382           if target_mode == constants.NIC_MODE_ROUTED and not nic.ip:
3383             nic_errors.append("Instance %s, nic/%d: routed NIC with no ip"
3384                               " address" % (instance.name, nic_idx))
3385       if nic_errors:
3386         raise errors.OpPrereqError("Cannot apply the change, errors:\n%s" %
3387                                    "\n".join(nic_errors))
3388
3389     # hypervisor list/parameters
3390     self.new_hvparams = new_hvp = objects.FillDict(cluster.hvparams, {})
3391     if self.op.hvparams:
3392       for hv_name, hv_dict in self.op.hvparams.items():
3393         if hv_name not in self.new_hvparams:
3394           self.new_hvparams[hv_name] = hv_dict
3395         else:
3396           self.new_hvparams[hv_name].update(hv_dict)
3397
3398     # os hypervisor parameters
3399     self.new_os_hvp = objects.FillDict(cluster.os_hvp, {})
3400     if self.op.os_hvp:
3401       for os_name, hvs in self.op.os_hvp.items():
3402         if os_name not in self.new_os_hvp:
3403           self.new_os_hvp[os_name] = hvs
3404         else:
3405           for hv_name, hv_dict in hvs.items():
3406             if hv_name not in self.new_os_hvp[os_name]:
3407               self.new_os_hvp[os_name][hv_name] = hv_dict
3408             else:
3409               self.new_os_hvp[os_name][hv_name].update(hv_dict)
3410
3411     # os parameters
3412     self.new_osp = objects.FillDict(cluster.osparams, {})
3413     if self.op.osparams:
3414       for os_name, osp in self.op.osparams.items():
3415         if os_name not in self.new_osp:
3416           self.new_osp[os_name] = {}
3417
3418         self.new_osp[os_name] = _GetUpdatedParams(self.new_osp[os_name], osp,
3419                                                   use_none=True)
3420
3421         if not self.new_osp[os_name]:
3422           # we removed all parameters
3423           del self.new_osp[os_name]
3424         else:
3425           # check the parameter validity (remote check)
3426           _CheckOSParams(self, False, [self.cfg.GetMasterNode()],
3427                          os_name, self.new_osp[os_name])
3428
3429     # changes to the hypervisor list
3430     if self.op.enabled_hypervisors is not None:
3431       self.hv_list = self.op.enabled_hypervisors
3432       for hv in self.hv_list:
3433         # if the hypervisor doesn't already exist in the cluster
3434         # hvparams, we initialize it to empty, and then (in both
3435         # cases) we make sure to fill the defaults, as we might not
3436         # have a complete defaults list if the hypervisor wasn't
3437         # enabled before
3438         if hv not in new_hvp:
3439           new_hvp[hv] = {}
3440         new_hvp[hv] = objects.FillDict(constants.HVC_DEFAULTS[hv], new_hvp[hv])
3441         utils.ForceDictType(new_hvp[hv], constants.HVS_PARAMETER_TYPES)
3442     else:
3443       self.hv_list = cluster.enabled_hypervisors
3444
3445     if self.op.hvparams or self.op.enabled_hypervisors is not None:
3446       # either the enabled list has changed, or the parameters have, validate
3447       for hv_name, hv_params in self.new_hvparams.items():
3448         if ((self.op.hvparams and hv_name in self.op.hvparams) or
3449             (self.op.enabled_hypervisors and
3450              hv_name in self.op.enabled_hypervisors)):
3451           # either this is a new hypervisor, or its parameters have changed
3452           hv_class = hypervisor.GetHypervisor(hv_name)
3453           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3454           hv_class.CheckParameterSyntax(hv_params)
3455           _CheckHVParams(self, node_list, hv_name, hv_params)
3456
3457     if self.op.os_hvp:
3458       # no need to check any newly-enabled hypervisors, since the
3459       # defaults have already been checked in the above code-block
3460       for os_name, os_hvp in self.new_os_hvp.items():
3461         for hv_name, hv_params in os_hvp.items():
3462           utils.ForceDictType(hv_params, constants.HVS_PARAMETER_TYPES)
3463           # we need to fill in the new os_hvp on top of the actual hv_p
3464           cluster_defaults = self.new_hvparams.get(hv_name, {})
3465           new_osp = objects.FillDict(cluster_defaults, hv_params)
3466           hv_class = hypervisor.GetHypervisor(hv_name)
3467           hv_class.CheckParameterSyntax(new_osp)
3468           _CheckHVParams(self, node_list, hv_name, new_osp)
3469
3470     if self.op.default_iallocator:
3471       alloc_script = utils.FindFile(self.op.default_iallocator,
3472                                     constants.IALLOCATOR_SEARCH_PATH,
3473                                     os.path.isfile)
3474       if alloc_script is None:
3475         raise errors.OpPrereqError("Invalid default iallocator script '%s'"
3476                                    " specified" % self.op.default_iallocator,
3477                                    errors.ECODE_INVAL)
3478
3479   def Exec(self, feedback_fn):
3480     """Change the parameters of the cluster.
3481
3482     """
3483     if self.op.vg_name is not None:
3484       new_volume = self.op.vg_name
3485       if not new_volume:
3486         new_volume = None
3487       if new_volume != self.cfg.GetVGName():
3488         self.cfg.SetVGName(new_volume)
3489       else:
3490         feedback_fn("Cluster LVM configuration already in desired"
3491                     " state, not changing")
3492     if self.op.drbd_helper is not None:
3493       new_helper = self.op.drbd_helper
3494       if not new_helper:
3495         new_helper = None
3496       if new_helper != self.cfg.GetDRBDHelper():
3497         self.cfg.SetDRBDHelper(new_helper)
3498       else:
3499         feedback_fn("Cluster DRBD helper already in desired state,"
3500                     " not changing")
3501     if self.op.hvparams:
3502       self.cluster.hvparams = self.new_hvparams
3503     if self.op.os_hvp:
3504       self.cluster.os_hvp = self.new_os_hvp
3505     if self.op.enabled_hypervisors is not None:
3506       self.cluster.hvparams = self.new_hvparams
3507       self.cluster.enabled_hypervisors = self.op.enabled_hypervisors
3508     if self.op.beparams:
3509       self.cluster.beparams[constants.PP_DEFAULT] = self.new_beparams
3510     if self.op.nicparams:
3511       self.cluster.nicparams[constants.PP_DEFAULT] = self.new_nicparams
3512     if self.op.osparams:
3513       self.cluster.osparams = self.new_osp
3514     if self.op.ndparams:
3515       self.cluster.ndparams = self.new_ndparams
3516
3517     if self.op.candidate_pool_size is not None:
3518       self.cluster.candidate_pool_size = self.op.candidate_pool_size
3519       # we need to update the pool size here, otherwise the save will fail
3520       _AdjustCandidatePool(self, [])
3521
3522     if self.op.maintain_node_health is not None:
3523       self.cluster.maintain_node_health = self.op.maintain_node_health
3524
3525     if self.op.prealloc_wipe_disks is not None:
3526       self.cluster.prealloc_wipe_disks = self.op.prealloc_wipe_disks
3527
3528     if self.op.add_uids is not None:
3529       uidpool.AddToUidPool(self.cluster.uid_pool, self.op.add_uids)
3530
3531     if self.op.remove_uids is not None:
3532       uidpool.RemoveFromUidPool(self.cluster.uid_pool, self.op.remove_uids)
3533
3534     if self.op.uid_pool is not None:
3535       self.cluster.uid_pool = self.op.uid_pool
3536
3537     if self.op.default_iallocator is not None:
3538       self.cluster.default_iallocator = self.op.default_iallocator
3539
3540     if self.op.reserved_lvs is not None:
3541       self.cluster.reserved_lvs = self.op.reserved_lvs
3542
3543     def helper_os(aname, mods, desc):
3544       desc += " OS list"
3545       lst = getattr(self.cluster, aname)
3546       for key, val in mods:
3547         if key == constants.DDM_ADD:
3548           if val in lst:
3549             feedback_fn("OS %s already in %s, ignoring" % (val, desc))
3550           else:
3551             lst.append(val)
3552         elif key == constants.DDM_REMOVE:
3553           if val in lst:
3554             lst.remove(val)
3555           else:
3556             feedback_fn("OS %s not found in %s, ignoring" % (val, desc))
3557         else:
3558           raise errors.ProgrammerError("Invalid modification '%s'" % key)
3559
3560     if self.op.hidden_os:
3561       helper_os("hidden_os", self.op.hidden_os, "hidden")
3562
3563     if self.op.blacklisted_os:
3564       helper_os("blacklisted_os", self.op.blacklisted_os, "blacklisted")
3565
3566     if self.op.master_netdev:
3567       master = self.cfg.GetMasterNode()
3568       feedback_fn("Shutting down master ip on the current netdev (%s)" %
3569                   self.cluster.master_netdev)
3570       result = self.rpc.call_node_stop_master(master, False)
3571       result.Raise("Could not disable the master ip")
3572       feedback_fn("Changing master_netdev from %s to %s" %
3573                   (self.cluster.master_netdev, self.op.master_netdev))
3574       self.cluster.master_netdev = self.op.master_netdev
3575
3576     self.cfg.Update(self.cluster, feedback_fn)
3577
3578     if self.op.master_netdev:
3579       feedback_fn("Starting the master ip on the new master netdev (%s)" %
3580                   self.op.master_netdev)
3581       result = self.rpc.call_node_start_master(master, False, False)
3582       if result.fail_msg:
3583         self.LogWarning("Could not re-enable the master ip on"
3584                         " the master, please restart manually: %s",
3585                         result.fail_msg)
3586
3587
3588 def _UploadHelper(lu, nodes, fname):
3589   """Helper for uploading a file and showing warnings.
3590
3591   """
3592   if os.path.exists(fname):
3593     result = lu.rpc.call_upload_file(nodes, fname)
3594     for to_node, to_result in result.items():
3595       msg = to_result.fail_msg
3596       if msg:
3597         msg = ("Copy of file %s to node %s failed: %s" %
3598                (fname, to_node, msg))
3599         lu.proc.LogWarning(msg)
3600
3601
3602 def _ComputeAncillaryFiles(cluster, redist):
3603   """Compute files external to Ganeti which need to be consistent.
3604
3605   @type redist: boolean
3606   @param redist: Whether to include files which need to be redistributed
3607
3608   """
3609   # Compute files for all nodes
3610   files_all = set([
3611     constants.SSH_KNOWN_HOSTS_FILE,
3612     constants.CONFD_HMAC_KEY,
3613     constants.CLUSTER_DOMAIN_SECRET_FILE,
3614     ])
3615
3616   if not redist:
3617     files_all.update(constants.ALL_CERT_FILES)
3618     files_all.update(ssconf.SimpleStore().GetFileList())
3619
3620   if cluster.modify_etc_hosts:
3621     files_all.add(constants.ETC_HOSTS)
3622
3623   # Files which must either exist on all nodes or on none
3624   files_all_opt = set([
3625     constants.RAPI_USERS_FILE,
3626     ])
3627
3628   # Files which should only be on master candidates
3629   files_mc = set()
3630   if not redist:
3631     files_mc.add(constants.CLUSTER_CONF_FILE)
3632
3633   # Files which should only be on VM-capable nodes
3634   files_vm = set(filename
3635     for hv_name in cluster.enabled_hypervisors
3636     for filename in hypervisor.GetHypervisor(hv_name).GetAncillaryFiles())
3637
3638   # Filenames must be unique
3639   assert (len(files_all | files_all_opt | files_mc | files_vm) ==
3640           sum(map(len, [files_all, files_all_opt, files_mc, files_vm]))), \
3641          "Found file listed in more than one file list"
3642
3643   return (files_all, files_all_opt, files_mc, files_vm)
3644
3645
3646 def _RedistributeAncillaryFiles(lu, additional_nodes=None, additional_vm=True):
3647   """Distribute additional files which are part of the cluster configuration.
3648
3649   ConfigWriter takes care of distributing the config and ssconf files, but
3650   there are more files which should be distributed to all nodes. This function
3651   makes sure those are copied.
3652
3653   @param lu: calling logical unit
3654   @param additional_nodes: list of nodes not in the config to distribute to
3655   @type additional_vm: boolean
3656   @param additional_vm: whether the additional nodes are vm-capable or not
3657
3658   """
3659   # Gather target nodes
3660   cluster = lu.cfg.GetClusterInfo()
3661   master_info = lu.cfg.GetNodeInfo(lu.cfg.GetMasterNode())
3662
3663   online_nodes = lu.cfg.GetOnlineNodeList()
3664   vm_nodes = lu.cfg.GetVmCapableNodeList()
3665
3666   if additional_nodes is not None:
3667     online_nodes.extend(additional_nodes)
3668     if additional_vm:
3669       vm_nodes.extend(additional_nodes)
3670
3671   # Never distribute to master node
3672   for nodelist in [online_nodes, vm_nodes]:
3673     if master_info.name in nodelist:
3674       nodelist.remove(master_info.name)
3675
3676   # Gather file lists
3677   (files_all, files_all_opt, files_mc, files_vm) = \
3678     _ComputeAncillaryFiles(cluster, True)
3679
3680   # Never re-distribute configuration file from here
3681   assert not (constants.CLUSTER_CONF_FILE in files_all or
3682               constants.CLUSTER_CONF_FILE in files_vm)
3683   assert not files_mc, "Master candidates not handled in this function"
3684
3685   filemap = [
3686     (online_nodes, files_all),
3687     (online_nodes, files_all_opt),
3688     (vm_nodes, files_vm),
3689     ]
3690
3691   # Upload the files
3692   for (node_list, files) in filemap:
3693     for fname in files:
3694       _UploadHelper(lu, node_list, fname)
3695
3696
3697 class LUClusterRedistConf(NoHooksLU):
3698   """Force the redistribution of cluster configuration.
3699
3700   This is a very simple LU.
3701
3702   """
3703   REQ_BGL = False
3704
3705   def ExpandNames(self):
3706     self.needed_locks = {
3707       locking.LEVEL_NODE: locking.ALL_SET,
3708     }
3709     self.share_locks[locking.LEVEL_NODE] = 1
3710
3711   def Exec(self, feedback_fn):
3712     """Redistribute the configuration.
3713
3714     """
3715     self.cfg.Update(self.cfg.GetClusterInfo(), feedback_fn)
3716     _RedistributeAncillaryFiles(self)
3717
3718
3719 def _WaitForSync(lu, instance, disks=None, oneshot=False):
3720   """Sleep and poll for an instance's disk to sync.
3721
3722   """
3723   if not instance.disks or disks is not None and not disks:
3724     return True
3725
3726   disks = _ExpandCheckDisks(instance, disks)
3727
3728   if not oneshot:
3729     lu.proc.LogInfo("Waiting for instance %s to sync disks." % instance.name)
3730
3731   node = instance.primary_node
3732
3733   for dev in disks:
3734     lu.cfg.SetDiskID(dev, node)
3735
3736   # TODO: Convert to utils.Retry
3737
3738   retries = 0
3739   degr_retries = 10 # in seconds, as we sleep 1 second each time
3740   while True:
3741     max_time = 0
3742     done = True
3743     cumul_degraded = False
3744     rstats = lu.rpc.call_blockdev_getmirrorstatus(node, disks)
3745     msg = rstats.fail_msg
3746     if msg:
3747       lu.LogWarning("Can't get any data from node %s: %s", node, msg)
3748       retries += 1
3749       if retries >= 10:
3750         raise errors.RemoteError("Can't contact node %s for mirror data,"
3751                                  " aborting." % node)
3752       time.sleep(6)
3753       continue
3754     rstats = rstats.payload
3755     retries = 0
3756     for i, mstat in enumerate(rstats):
3757       if mstat is None:
3758         lu.LogWarning("Can't compute data for node %s/%s",
3759                            node, disks[i].iv_name)
3760         continue
3761
3762       cumul_degraded = (cumul_degraded or
3763                         (mstat.is_degraded and mstat.sync_percent is None))
3764       if mstat.sync_percent is not None:
3765         done = False
3766         if mstat.estimated_time is not None:
3767           rem_time = ("%s remaining (estimated)" %
3768                       utils.FormatSeconds(mstat.estimated_time))
3769           max_time = mstat.estimated_time
3770         else:
3771           rem_time = "no time estimate"
3772         lu.proc.LogInfo("- device %s: %5.2f%% done, %s" %
3773                         (disks[i].iv_name, mstat.sync_percent, rem_time))
3774
3775     # if we're done but degraded, let's do a few small retries, to
3776     # make sure we see a stable and not transient situation; therefore
3777     # we force restart of the loop
3778     if (done or oneshot) and cumul_degraded and degr_retries > 0:
3779       logging.info("Degraded disks found, %d retries left", degr_retries)
3780       degr_retries -= 1
3781       time.sleep(1)
3782       continue
3783
3784     if done or oneshot:
3785       break
3786
3787     time.sleep(min(60, max_time))
3788
3789   if done:
3790     lu.proc.LogInfo("Instance %s's disks are in sync." % instance.name)
3791   return not cumul_degraded
3792
3793
3794 def _CheckDiskConsistency(lu, dev, node, on_primary, ldisk=False):
3795   """Check that mirrors are not degraded.
3796
3797   The ldisk parameter, if True, will change the test from the
3798   is_degraded attribute (which represents overall non-ok status for
3799   the device(s)) to the ldisk (representing the local storage status).
3800
3801   """
3802   lu.cfg.SetDiskID(dev, node)
3803
3804   result = True
3805
3806   if on_primary or dev.AssembleOnSecondary():
3807     rstats = lu.rpc.call_blockdev_find(node, dev)
3808     msg = rstats.fail_msg
3809     if msg:
3810       lu.LogWarning("Can't find disk on node %s: %s", node, msg)
3811       result = False
3812     elif not rstats.payload:
3813       lu.LogWarning("Can't find disk on node %s", node)
3814       result = False
3815     else:
3816       if ldisk:
3817         result = result and rstats.payload.ldisk_status == constants.LDS_OKAY
3818       else:
3819         result = result and not rstats.payload.is_degraded
3820
3821   if dev.children:
3822     for child in dev.children:
3823       result = result and _CheckDiskConsistency(lu, child, node, on_primary)
3824
3825   return result
3826
3827
3828 class LUOobCommand(NoHooksLU):
3829   """Logical unit for OOB handling.
3830
3831   """
3832   REG_BGL = False
3833   _SKIP_MASTER = (constants.OOB_POWER_OFF, constants.OOB_POWER_CYCLE)
3834
3835   def ExpandNames(self):
3836     """Gather locks we need.
3837
3838     """
3839     if self.op.node_names:
3840       self.op.node_names = _GetWantedNodes(self, self.op.node_names)
3841       lock_names = self.op.node_names
3842     else:
3843       lock_names = locking.ALL_SET
3844
3845     self.needed_locks = {
3846       locking.LEVEL_NODE: lock_names,
3847       }
3848
3849   def CheckPrereq(self):
3850     """Check prerequisites.
3851
3852     This checks:
3853      - the node exists in the configuration
3854      - OOB is supported
3855
3856     Any errors are signaled by raising errors.OpPrereqError.
3857
3858     """
3859     self.nodes = []
3860     self.master_node = self.cfg.GetMasterNode()
3861
3862     assert self.op.power_delay >= 0.0
3863
3864     if self.op.node_names:
3865       if (self.op.command in self._SKIP_MASTER and
3866           self.master_node in self.op.node_names):
3867         master_node_obj = self.cfg.GetNodeInfo(self.master_node)
3868         master_oob_handler = _SupportsOob(self.cfg, master_node_obj)
3869
3870         if master_oob_handler:
3871           additional_text = ("run '%s %s %s' if you want to operate on the"
3872                              " master regardless") % (master_oob_handler,
3873                                                       self.op.command,
3874                                                       self.master_node)
3875         else:
3876           additional_text = "it does not support out-of-band operations"
3877
3878         raise errors.OpPrereqError(("Operating on the master node %s is not"
3879                                     " allowed for %s; %s") %
3880                                    (self.master_node, self.op.command,
3881                                     additional_text), errors.ECODE_INVAL)
3882     else:
3883       self.op.node_names = self.cfg.GetNodeList()
3884       if self.op.command in self._SKIP_MASTER:
3885         self.op.node_names.remove(self.master_node)
3886
3887     if self.op.command in self._SKIP_MASTER:
3888       assert self.master_node not in self.op.node_names
3889
3890     for (node_name, node) in self.cfg.GetMultiNodeInfo(self.op.node_names):
3891       if node is None:
3892         raise errors.OpPrereqError("Node %s not found" % node_name,
3893                                    errors.ECODE_NOENT)
3894       else:
3895         self.nodes.append(node)
3896
3897       if (not self.op.ignore_status and
3898           (self.op.command == constants.OOB_POWER_OFF and not node.offline)):
3899         raise errors.OpPrereqError(("Cannot power off node %s because it is"
3900                                     " not marked offline") % node_name,
3901                                    errors.ECODE_STATE)
3902
3903   def Exec(self, feedback_fn):
3904     """Execute OOB and return result if we expect any.
3905
3906     """
3907     master_node = self.master_node
3908     ret = []
3909
3910     for idx, node in enumerate(utils.NiceSort(self.nodes,
3911                                               key=lambda node: node.name)):
3912       node_entry = [(constants.RS_NORMAL, node.name)]
3913       ret.append(node_entry)
3914
3915       oob_program = _SupportsOob(self.cfg, node)
3916
3917       if not oob_program:
3918         node_entry.append((constants.RS_UNAVAIL, None))
3919         continue
3920
3921       logging.info("Executing out-of-band command '%s' using '%s' on %s",
3922                    self.op.command, oob_program, node.name)
3923       result = self.rpc.call_run_oob(master_node, oob_program,
3924                                      self.op.command, node.name,
3925                                      self.op.timeout)
3926
3927       if result.fail_msg:
3928         self.LogWarning("Out-of-band RPC failed on node '%s': %s",
3929                         node.name, result.fail_msg)
3930         node_entry.append((constants.RS_NODATA, None))
3931       else:
3932         try:
3933           self._CheckPayload(result)
3934         except errors.OpExecError, err:
3935           self.LogWarning("Payload returned by node '%s' is not valid: %s",
3936                           node.name, err)
3937           node_entry.append((constants.RS_NODATA, None))
3938         else:
3939           if self.op.command == constants.OOB_HEALTH:
3940             # For health we should log important events
3941             for item, status in result.payload:
3942               if status in [constants.OOB_STATUS_WARNING,
3943                             constants.OOB_STATUS_CRITICAL]:
3944                 self.LogWarning("Item '%s' on node '%s' has status '%s'",
3945                                 item, node.name, status)
3946
3947           if self.op.command == constants.OOB_POWER_ON:
3948             node.powered = True
3949           elif self.op.command == constants.OOB_POWER_OFF:
3950             node.powered = False
3951           elif self.op.command == constants.OOB_POWER_STATUS:
3952             powered = result.payload[constants.OOB_POWER_STATUS_POWERED]
3953             if powered != node.powered:
3954               logging.warning(("Recorded power state (%s) of node '%s' does not"
3955                                " match actual power state (%s)"), node.powered,
3956                               node.name, powered)
3957
3958           # For configuration changing commands we should update the node
3959           if self.op.command in (constants.OOB_POWER_ON,
3960                                  constants.OOB_POWER_OFF):
3961             self.cfg.Update(node, feedback_fn)
3962
3963           node_entry.append((constants.RS_NORMAL, result.payload))
3964
3965           if (self.op.command == constants.OOB_POWER_ON and
3966               idx < len(self.nodes) - 1):
3967             time.sleep(self.op.power_delay)
3968
3969     return ret
3970
3971   def _CheckPayload(self, result):
3972     """Checks if the payload is valid.
3973
3974     @param result: RPC result
3975     @raises errors.OpExecError: If payload is not valid
3976
3977     """
3978     errs = []
3979     if self.op.command == constants.OOB_HEALTH:
3980       if not isinstance(result.payload, list):
3981         errs.append("command 'health' is expected to return a list but got %s" %
3982                     type(result.payload))
3983       else:
3984         for item, status in result.payload:
3985           if status not in constants.OOB_STATUSES:
3986             errs.append("health item '%s' has invalid status '%s'" %
3987                         (item, status))
3988
3989     if self.op.command == constants.OOB_POWER_STATUS:
3990       if not isinstance(result.payload, dict):
3991         errs.append("power-status is expected to return a dict but got %s" %
3992                     type(result.payload))
3993
3994     if self.op.command in [
3995         constants.OOB_POWER_ON,
3996         constants.OOB_POWER_OFF,
3997         constants.OOB_POWER_CYCLE,
3998         ]:
3999       if result.payload is not None:
4000         errs.append("%s is expected to not return payload but got '%s'" %
4001                     (self.op.command, result.payload))
4002
4003     if errs:
4004       raise errors.OpExecError("Check of out-of-band payload failed due to %s" %
4005                                utils.CommaJoin(errs))
4006
4007 class _OsQuery(_QueryBase):
4008   FIELDS = query.OS_FIELDS
4009
4010   def ExpandNames(self, lu):
4011     # Lock all nodes in shared mode
4012     # Temporary removal of locks, should be reverted later
4013     # TODO: reintroduce locks when they are lighter-weight
4014     lu.needed_locks = {}
4015     #self.share_locks[locking.LEVEL_NODE] = 1
4016     #self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4017
4018     # The following variables interact with _QueryBase._GetNames
4019     if self.names:
4020       self.wanted = self.names
4021     else:
4022       self.wanted = locking.ALL_SET
4023
4024     self.do_locking = self.use_locking
4025
4026   def DeclareLocks(self, lu, level):
4027     pass
4028
4029   @staticmethod
4030   def _DiagnoseByOS(rlist):
4031     """Remaps a per-node return list into an a per-os per-node dictionary
4032
4033     @param rlist: a map with node names as keys and OS objects as values
4034
4035     @rtype: dict
4036     @return: a dictionary with osnames as keys and as value another
4037         map, with nodes as keys and tuples of (path, status, diagnose,
4038         variants, parameters, api_versions) as values, eg::
4039
4040           {"debian-etch": {"node1": [(/usr/lib/..., True, "", [], []),
4041                                      (/srv/..., False, "invalid api")],
4042                            "node2": [(/srv/..., True, "", [], [])]}
4043           }
4044
4045     """
4046     all_os = {}
4047     # we build here the list of nodes that didn't fail the RPC (at RPC
4048     # level), so that nodes with a non-responding node daemon don't
4049     # make all OSes invalid
4050     good_nodes = [node_name for node_name in rlist
4051                   if not rlist[node_name].fail_msg]
4052     for node_name, nr in rlist.items():
4053       if nr.fail_msg or not nr.payload:
4054         continue
4055       for (name, path, status, diagnose, variants,
4056            params, api_versions) in nr.payload:
4057         if name not in all_os:
4058           # build a list of nodes for this os containing empty lists
4059           # for each node in node_list
4060           all_os[name] = {}
4061           for nname in good_nodes:
4062             all_os[name][nname] = []
4063         # convert params from [name, help] to (name, help)
4064         params = [tuple(v) for v in params]
4065         all_os[name][node_name].append((path, status, diagnose,
4066                                         variants, params, api_versions))
4067     return all_os
4068
4069   def _GetQueryData(self, lu):
4070     """Computes the list of nodes and their attributes.
4071
4072     """
4073     # Locking is not used
4074     assert not (compat.any(lu.glm.is_owned(level)
4075                            for level in locking.LEVELS
4076                            if level != locking.LEVEL_CLUSTER) or
4077                 self.do_locking or self.use_locking)
4078
4079     valid_nodes = [node.name
4080                    for node in lu.cfg.GetAllNodesInfo().values()
4081                    if not node.offline and node.vm_capable]
4082     pol = self._DiagnoseByOS(lu.rpc.call_os_diagnose(valid_nodes))
4083     cluster = lu.cfg.GetClusterInfo()
4084
4085     data = {}
4086
4087     for (os_name, os_data) in pol.items():
4088       info = query.OsInfo(name=os_name, valid=True, node_status=os_data,
4089                           hidden=(os_name in cluster.hidden_os),
4090                           blacklisted=(os_name in cluster.blacklisted_os))
4091
4092       variants = set()
4093       parameters = set()
4094       api_versions = set()
4095
4096       for idx, osl in enumerate(os_data.values()):
4097         info.valid = bool(info.valid and osl and osl[0][1])
4098         if not info.valid:
4099           break
4100
4101         (node_variants, node_params, node_api) = osl[0][3:6]
4102         if idx == 0:
4103           # First entry
4104           variants.update(node_variants)
4105           parameters.update(node_params)
4106           api_versions.update(node_api)
4107         else:
4108           # Filter out inconsistent values
4109           variants.intersection_update(node_variants)
4110           parameters.intersection_update(node_params)
4111           api_versions.intersection_update(node_api)
4112
4113       info.variants = list(variants)
4114       info.parameters = list(parameters)
4115       info.api_versions = list(api_versions)
4116
4117       data[os_name] = info
4118
4119     # Prepare data in requested order
4120     return [data[name] for name in self._GetNames(lu, pol.keys(), None)
4121             if name in data]
4122
4123
4124 class LUOsDiagnose(NoHooksLU):
4125   """Logical unit for OS diagnose/query.
4126
4127   """
4128   REQ_BGL = False
4129
4130   @staticmethod
4131   def _BuildFilter(fields, names):
4132     """Builds a filter for querying OSes.
4133
4134     """
4135     name_filter = qlang.MakeSimpleFilter("name", names)
4136
4137     # Legacy behaviour: Hide hidden, blacklisted or invalid OSes if the
4138     # respective field is not requested
4139     status_filter = [[qlang.OP_NOT, [qlang.OP_TRUE, fname]]
4140                      for fname in ["hidden", "blacklisted"]
4141                      if fname not in fields]
4142     if "valid" not in fields:
4143       status_filter.append([qlang.OP_TRUE, "valid"])
4144
4145     if status_filter:
4146       status_filter.insert(0, qlang.OP_AND)
4147     else:
4148       status_filter = None
4149
4150     if name_filter and status_filter:
4151       return [qlang.OP_AND, name_filter, status_filter]
4152     elif name_filter:
4153       return name_filter
4154     else:
4155       return status_filter
4156
4157   def CheckArguments(self):
4158     self.oq = _OsQuery(self._BuildFilter(self.op.output_fields, self.op.names),
4159                        self.op.output_fields, False)
4160
4161   def ExpandNames(self):
4162     self.oq.ExpandNames(self)
4163
4164   def Exec(self, feedback_fn):
4165     return self.oq.OldStyleQuery(self)
4166
4167
4168 class LUNodeRemove(LogicalUnit):
4169   """Logical unit for removing a node.
4170
4171   """
4172   HPATH = "node-remove"
4173   HTYPE = constants.HTYPE_NODE
4174
4175   def BuildHooksEnv(self):
4176     """Build hooks env.
4177
4178     This doesn't run on the target node in the pre phase as a failed
4179     node would then be impossible to remove.
4180
4181     """
4182     return {
4183       "OP_TARGET": self.op.node_name,
4184       "NODE_NAME": self.op.node_name,
4185       }
4186
4187   def BuildHooksNodes(self):
4188     """Build hooks nodes.
4189
4190     """
4191     all_nodes = self.cfg.GetNodeList()
4192     try:
4193       all_nodes.remove(self.op.node_name)
4194     except ValueError:
4195       logging.warning("Node '%s', which is about to be removed, was not found"
4196                       " in the list of all nodes", self.op.node_name)
4197     return (all_nodes, all_nodes)
4198
4199   def CheckPrereq(self):
4200     """Check prerequisites.
4201
4202     This checks:
4203      - the node exists in the configuration
4204      - it does not have primary or secondary instances
4205      - it's not the master
4206
4207     Any errors are signaled by raising errors.OpPrereqError.
4208
4209     """
4210     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4211     node = self.cfg.GetNodeInfo(self.op.node_name)
4212     assert node is not None
4213
4214     masternode = self.cfg.GetMasterNode()
4215     if node.name == masternode:
4216       raise errors.OpPrereqError("Node is the master node, failover to another"
4217                                  " node is required", errors.ECODE_INVAL)
4218
4219     for instance_name, instance in self.cfg.GetAllInstancesInfo():
4220       if node.name in instance.all_nodes:
4221         raise errors.OpPrereqError("Instance %s is still running on the node,"
4222                                    " please remove first" % instance_name,
4223                                    errors.ECODE_INVAL)
4224     self.op.node_name = node.name
4225     self.node = node
4226
4227   def Exec(self, feedback_fn):
4228     """Removes the node from the cluster.
4229
4230     """
4231     node = self.node
4232     logging.info("Stopping the node daemon and removing configs from node %s",
4233                  node.name)
4234
4235     modify_ssh_setup = self.cfg.GetClusterInfo().modify_ssh_setup
4236
4237     # Promote nodes to master candidate as needed
4238     _AdjustCandidatePool(self, exceptions=[node.name])
4239     self.context.RemoveNode(node.name)
4240
4241     # Run post hooks on the node before it's removed
4242     _RunPostHook(self, node.name)
4243
4244     result = self.rpc.call_node_leave_cluster(node.name, modify_ssh_setup)
4245     msg = result.fail_msg
4246     if msg:
4247       self.LogWarning("Errors encountered on the remote node while leaving"
4248                       " the cluster: %s", msg)
4249
4250     # Remove node from our /etc/hosts
4251     if self.cfg.GetClusterInfo().modify_etc_hosts:
4252       master_node = self.cfg.GetMasterNode()
4253       result = self.rpc.call_etc_hosts_modify(master_node,
4254                                               constants.ETC_HOSTS_REMOVE,
4255                                               node.name, None)
4256       result.Raise("Can't update hosts file with new host data")
4257       _RedistributeAncillaryFiles(self)
4258
4259
4260 class _NodeQuery(_QueryBase):
4261   FIELDS = query.NODE_FIELDS
4262
4263   def ExpandNames(self, lu):
4264     lu.needed_locks = {}
4265     lu.share_locks[locking.LEVEL_NODE] = 1
4266
4267     if self.names:
4268       self.wanted = _GetWantedNodes(lu, self.names)
4269     else:
4270       self.wanted = locking.ALL_SET
4271
4272     self.do_locking = (self.use_locking and
4273                        query.NQ_LIVE in self.requested_data)
4274
4275     if self.do_locking:
4276       # if we don't request only static fields, we need to lock the nodes
4277       lu.needed_locks[locking.LEVEL_NODE] = self.wanted
4278
4279   def DeclareLocks(self, lu, level):
4280     pass
4281
4282   def _GetQueryData(self, lu):
4283     """Computes the list of nodes and their attributes.
4284
4285     """
4286     all_info = lu.cfg.GetAllNodesInfo()
4287
4288     nodenames = self._GetNames(lu, all_info.keys(), locking.LEVEL_NODE)
4289
4290     # Gather data as requested
4291     if query.NQ_LIVE in self.requested_data:
4292       # filter out non-vm_capable nodes
4293       toquery_nodes = [name for name in nodenames if all_info[name].vm_capable]
4294
4295       node_data = lu.rpc.call_node_info(toquery_nodes, lu.cfg.GetVGName(),
4296                                         lu.cfg.GetHypervisorType())
4297       live_data = dict((name, nresult.payload)
4298                        for (name, nresult) in node_data.items()
4299                        if not nresult.fail_msg and nresult.payload)
4300     else:
4301       live_data = None
4302
4303     if query.NQ_INST in self.requested_data:
4304       node_to_primary = dict([(name, set()) for name in nodenames])
4305       node_to_secondary = dict([(name, set()) for name in nodenames])
4306
4307       inst_data = lu.cfg.GetAllInstancesInfo()
4308
4309       for inst in inst_data.values():
4310         if inst.primary_node in node_to_primary:
4311           node_to_primary[inst.primary_node].add(inst.name)
4312         for secnode in inst.secondary_nodes:
4313           if secnode in node_to_secondary:
4314             node_to_secondary[secnode].add(inst.name)
4315     else:
4316       node_to_primary = None
4317       node_to_secondary = None
4318
4319     if query.NQ_OOB in self.requested_data:
4320       oob_support = dict((name, bool(_SupportsOob(lu.cfg, node)))
4321                          for name, node in all_info.iteritems())
4322     else:
4323       oob_support = None
4324
4325     if query.NQ_GROUP in self.requested_data:
4326       groups = lu.cfg.GetAllNodeGroupsInfo()
4327     else:
4328       groups = {}
4329
4330     return query.NodeQueryData([all_info[name] for name in nodenames],
4331                                live_data, lu.cfg.GetMasterNode(),
4332                                node_to_primary, node_to_secondary, groups,
4333                                oob_support, lu.cfg.GetClusterInfo())
4334
4335
4336 class LUNodeQuery(NoHooksLU):
4337   """Logical unit for querying nodes.
4338
4339   """
4340   # pylint: disable-msg=W0142
4341   REQ_BGL = False
4342
4343   def CheckArguments(self):
4344     self.nq = _NodeQuery(qlang.MakeSimpleFilter("name", self.op.names),
4345                          self.op.output_fields, self.op.use_locking)
4346
4347   def ExpandNames(self):
4348     self.nq.ExpandNames(self)
4349
4350   def Exec(self, feedback_fn):
4351     return self.nq.OldStyleQuery(self)
4352
4353
4354 class LUNodeQueryvols(NoHooksLU):
4355   """Logical unit for getting volumes on node(s).
4356
4357   """
4358   REQ_BGL = False
4359   _FIELDS_DYNAMIC = utils.FieldSet("phys", "vg", "name", "size", "instance")
4360   _FIELDS_STATIC = utils.FieldSet("node")
4361
4362   def CheckArguments(self):
4363     _CheckOutputFields(static=self._FIELDS_STATIC,
4364                        dynamic=self._FIELDS_DYNAMIC,
4365                        selected=self.op.output_fields)
4366
4367   def ExpandNames(self):
4368     self.needed_locks = {}
4369     self.share_locks[locking.LEVEL_NODE] = 1
4370     if not self.op.nodes:
4371       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4372     else:
4373       self.needed_locks[locking.LEVEL_NODE] = \
4374         _GetWantedNodes(self, self.op.nodes)
4375
4376   def Exec(self, feedback_fn):
4377     """Computes the list of nodes and their attributes.
4378
4379     """
4380     nodenames = self.glm.list_owned(locking.LEVEL_NODE)
4381     volumes = self.rpc.call_node_volumes(nodenames)
4382
4383     ilist = self.cfg.GetAllInstancesInfo()
4384     vol2inst = _MapInstanceDisksToNodes(ilist.values())
4385
4386     output = []
4387     for node in nodenames:
4388       nresult = volumes[node]
4389       if nresult.offline:
4390         continue
4391       msg = nresult.fail_msg
4392       if msg:
4393         self.LogWarning("Can't compute volume data on node %s: %s", node, msg)
4394         continue
4395
4396       node_vols = sorted(nresult.payload,
4397                          key=operator.itemgetter("dev"))
4398
4399       for vol in node_vols:
4400         node_output = []
4401         for field in self.op.output_fields:
4402           if field == "node":
4403             val = node
4404           elif field == "phys":
4405             val = vol["dev"]
4406           elif field == "vg":
4407             val = vol["vg"]
4408           elif field == "name":
4409             val = vol["name"]
4410           elif field == "size":
4411             val = int(float(vol["size"]))
4412           elif field == "instance":
4413             val = vol2inst.get((node, vol["vg"] + "/" + vol["name"]), "-")
4414           else:
4415             raise errors.ParameterError(field)
4416           node_output.append(str(val))
4417
4418         output.append(node_output)
4419
4420     return output
4421
4422
4423 class LUNodeQueryStorage(NoHooksLU):
4424   """Logical unit for getting information on storage units on node(s).
4425
4426   """
4427   _FIELDS_STATIC = utils.FieldSet(constants.SF_NODE)
4428   REQ_BGL = False
4429
4430   def CheckArguments(self):
4431     _CheckOutputFields(static=self._FIELDS_STATIC,
4432                        dynamic=utils.FieldSet(*constants.VALID_STORAGE_FIELDS),
4433                        selected=self.op.output_fields)
4434
4435   def ExpandNames(self):
4436     self.needed_locks = {}
4437     self.share_locks[locking.LEVEL_NODE] = 1
4438
4439     if self.op.nodes:
4440       self.needed_locks[locking.LEVEL_NODE] = \
4441         _GetWantedNodes(self, self.op.nodes)
4442     else:
4443       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
4444
4445   def Exec(self, feedback_fn):
4446     """Computes the list of nodes and their attributes.
4447
4448     """
4449     self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
4450
4451     # Always get name to sort by
4452     if constants.SF_NAME in self.op.output_fields:
4453       fields = self.op.output_fields[:]
4454     else:
4455       fields = [constants.SF_NAME] + self.op.output_fields
4456
4457     # Never ask for node or type as it's only known to the LU
4458     for extra in [constants.SF_NODE, constants.SF_TYPE]:
4459       while extra in fields:
4460         fields.remove(extra)
4461
4462     field_idx = dict([(name, idx) for (idx, name) in enumerate(fields)])
4463     name_idx = field_idx[constants.SF_NAME]
4464
4465     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4466     data = self.rpc.call_storage_list(self.nodes,
4467                                       self.op.storage_type, st_args,
4468                                       self.op.name, fields)
4469
4470     result = []
4471
4472     for node in utils.NiceSort(self.nodes):
4473       nresult = data[node]
4474       if nresult.offline:
4475         continue
4476
4477       msg = nresult.fail_msg
4478       if msg:
4479         self.LogWarning("Can't get storage data from node %s: %s", node, msg)
4480         continue
4481
4482       rows = dict([(row[name_idx], row) for row in nresult.payload])
4483
4484       for name in utils.NiceSort(rows.keys()):
4485         row = rows[name]
4486
4487         out = []
4488
4489         for field in self.op.output_fields:
4490           if field == constants.SF_NODE:
4491             val = node
4492           elif field == constants.SF_TYPE:
4493             val = self.op.storage_type
4494           elif field in field_idx:
4495             val = row[field_idx[field]]
4496           else:
4497             raise errors.ParameterError(field)
4498
4499           out.append(val)
4500
4501         result.append(out)
4502
4503     return result
4504
4505
4506 class _InstanceQuery(_QueryBase):
4507   FIELDS = query.INSTANCE_FIELDS
4508
4509   def ExpandNames(self, lu):
4510     lu.needed_locks = {}
4511     lu.share_locks = _ShareAll()
4512
4513     if self.names:
4514       self.wanted = _GetWantedInstances(lu, self.names)
4515     else:
4516       self.wanted = locking.ALL_SET
4517
4518     self.do_locking = (self.use_locking and
4519                        query.IQ_LIVE in self.requested_data)
4520     if self.do_locking:
4521       lu.needed_locks[locking.LEVEL_INSTANCE] = self.wanted
4522       lu.needed_locks[locking.LEVEL_NODEGROUP] = []
4523       lu.needed_locks[locking.LEVEL_NODE] = []
4524       lu.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
4525
4526     self.do_grouplocks = (self.do_locking and
4527                           query.IQ_NODES in self.requested_data)
4528
4529   def DeclareLocks(self, lu, level):
4530     if self.do_locking:
4531       if level == locking.LEVEL_NODEGROUP and self.do_grouplocks:
4532         assert not lu.needed_locks[locking.LEVEL_NODEGROUP]
4533
4534         # Lock all groups used by instances optimistically; this requires going
4535         # via the node before it's locked, requiring verification later on
4536         lu.needed_locks[locking.LEVEL_NODEGROUP] = \
4537           set(group_uuid
4538               for instance_name in
4539                 lu.glm.list_owned(locking.LEVEL_INSTANCE)
4540               for group_uuid in
4541                 lu.cfg.GetInstanceNodeGroups(instance_name))
4542       elif level == locking.LEVEL_NODE:
4543         lu._LockInstancesNodes() # pylint: disable-msg=W0212
4544
4545   @staticmethod
4546   def _CheckGroupLocks(lu):
4547     owned_instances = frozenset(lu.glm.list_owned(locking.LEVEL_INSTANCE))
4548     owned_groups = frozenset(lu.glm.list_owned(locking.LEVEL_NODEGROUP))
4549
4550     # Check if node groups for locked instances are still correct
4551     for instance_name in owned_instances:
4552       inst_groups = lu.cfg.GetInstanceNodeGroups(instance_name)
4553       if not owned_groups.issuperset(inst_groups):
4554         raise errors.OpPrereqError("Instance %s's node groups changed since"
4555                                    " locks were acquired, current groups are"
4556                                    " are '%s', owning groups '%s'; retry the"
4557                                    " operation" %
4558                                    (instance_name,
4559                                     utils.CommaJoin(inst_groups),
4560                                     utils.CommaJoin(owned_groups)),
4561                                    errors.ECODE_STATE)
4562
4563   def _GetQueryData(self, lu):
4564     """Computes the list of instances and their attributes.
4565
4566     """
4567     if self.do_grouplocks:
4568       self._CheckGroupLocks(lu)
4569
4570     cluster = lu.cfg.GetClusterInfo()
4571     all_info = lu.cfg.GetAllInstancesInfo()
4572
4573     instance_names = self._GetNames(lu, all_info.keys(), locking.LEVEL_INSTANCE)
4574
4575     instance_list = [all_info[name] for name in instance_names]
4576     nodes = frozenset(itertools.chain(*(inst.all_nodes
4577                                         for inst in instance_list)))
4578     hv_list = list(set([inst.hypervisor for inst in instance_list]))
4579     bad_nodes = []
4580     offline_nodes = []
4581     wrongnode_inst = set()
4582
4583     # Gather data as requested
4584     if self.requested_data & set([query.IQ_LIVE, query.IQ_CONSOLE]):
4585       live_data = {}
4586       node_data = lu.rpc.call_all_instances_info(nodes, hv_list)
4587       for name in nodes:
4588         result = node_data[name]
4589         if result.offline:
4590           # offline nodes will be in both lists
4591           assert result.fail_msg
4592           offline_nodes.append(name)
4593         if result.fail_msg:
4594           bad_nodes.append(name)
4595         elif result.payload:
4596           for inst in result.payload:
4597             if inst in all_info:
4598               if all_info[inst].primary_node == name:
4599                 live_data.update(result.payload)
4600               else:
4601                 wrongnode_inst.add(inst)
4602             else:
4603               # orphan instance; we don't list it here as we don't
4604               # handle this case yet in the output of instance listing
4605               logging.warning("Orphan instance '%s' found on node %s",
4606                               inst, name)
4607         # else no instance is alive
4608     else:
4609       live_data = {}
4610
4611     if query.IQ_DISKUSAGE in self.requested_data:
4612       disk_usage = dict((inst.name,
4613                          _ComputeDiskSize(inst.disk_template,
4614                                           [{constants.IDISK_SIZE: disk.size}
4615                                            for disk in inst.disks]))
4616                         for inst in instance_list)
4617     else:
4618       disk_usage = None
4619
4620     if query.IQ_CONSOLE in self.requested_data:
4621       consinfo = {}
4622       for inst in instance_list:
4623         if inst.name in live_data:
4624           # Instance is running
4625           consinfo[inst.name] = _GetInstanceConsole(cluster, inst)
4626         else:
4627           consinfo[inst.name] = None
4628       assert set(consinfo.keys()) == set(instance_names)
4629     else:
4630       consinfo = None
4631
4632     if query.IQ_NODES in self.requested_data:
4633       node_names = set(itertools.chain(*map(operator.attrgetter("all_nodes"),
4634                                             instance_list)))
4635       nodes = dict(lu.cfg.GetMultiNodeInfo(node_names))
4636       groups = dict((uuid, lu.cfg.GetNodeGroup(uuid))
4637                     for uuid in set(map(operator.attrgetter("group"),
4638                                         nodes.values())))
4639     else:
4640       nodes = None
4641       groups = None
4642
4643     return query.InstanceQueryData(instance_list, lu.cfg.GetClusterInfo(),
4644                                    disk_usage, offline_nodes, bad_nodes,
4645                                    live_data, wrongnode_inst, consinfo,
4646                                    nodes, groups)
4647
4648
4649 class LUQuery(NoHooksLU):
4650   """Query for resources/items of a certain kind.
4651
4652   """
4653   # pylint: disable-msg=W0142
4654   REQ_BGL = False
4655
4656   def CheckArguments(self):
4657     qcls = _GetQueryImplementation(self.op.what)
4658
4659     self.impl = qcls(self.op.filter, self.op.fields, False)
4660
4661   def ExpandNames(self):
4662     self.impl.ExpandNames(self)
4663
4664   def DeclareLocks(self, level):
4665     self.impl.DeclareLocks(self, level)
4666
4667   def Exec(self, feedback_fn):
4668     return self.impl.NewStyleQuery(self)
4669
4670
4671 class LUQueryFields(NoHooksLU):
4672   """Query for resources/items of a certain kind.
4673
4674   """
4675   # pylint: disable-msg=W0142
4676   REQ_BGL = False
4677
4678   def CheckArguments(self):
4679     self.qcls = _GetQueryImplementation(self.op.what)
4680
4681   def ExpandNames(self):
4682     self.needed_locks = {}
4683
4684   def Exec(self, feedback_fn):
4685     return query.QueryFields(self.qcls.FIELDS, self.op.fields)
4686
4687
4688 class LUNodeModifyStorage(NoHooksLU):
4689   """Logical unit for modifying a storage volume on a node.
4690
4691   """
4692   REQ_BGL = False
4693
4694   def CheckArguments(self):
4695     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
4696
4697     storage_type = self.op.storage_type
4698
4699     try:
4700       modifiable = constants.MODIFIABLE_STORAGE_FIELDS[storage_type]
4701     except KeyError:
4702       raise errors.OpPrereqError("Storage units of type '%s' can not be"
4703                                  " modified" % storage_type,
4704                                  errors.ECODE_INVAL)
4705
4706     diff = set(self.op.changes.keys()) - modifiable
4707     if diff:
4708       raise errors.OpPrereqError("The following fields can not be modified for"
4709                                  " storage units of type '%s': %r" %
4710                                  (storage_type, list(diff)),
4711                                  errors.ECODE_INVAL)
4712
4713   def ExpandNames(self):
4714     self.needed_locks = {
4715       locking.LEVEL_NODE: self.op.node_name,
4716       }
4717
4718   def Exec(self, feedback_fn):
4719     """Computes the list of nodes and their attributes.
4720
4721     """
4722     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
4723     result = self.rpc.call_storage_modify(self.op.node_name,
4724                                           self.op.storage_type, st_args,
4725                                           self.op.name, self.op.changes)
4726     result.Raise("Failed to modify storage unit '%s' on %s" %
4727                  (self.op.name, self.op.node_name))
4728
4729
4730 class LUNodeAdd(LogicalUnit):
4731   """Logical unit for adding node to the cluster.
4732
4733   """
4734   HPATH = "node-add"
4735   HTYPE = constants.HTYPE_NODE
4736   _NFLAGS = ["master_capable", "vm_capable"]
4737
4738   def CheckArguments(self):
4739     self.primary_ip_family = self.cfg.GetPrimaryIPFamily()
4740     # validate/normalize the node name
4741     self.hostname = netutils.GetHostname(name=self.op.node_name,
4742                                          family=self.primary_ip_family)
4743     self.op.node_name = self.hostname.name
4744
4745     if self.op.readd and self.op.node_name == self.cfg.GetMasterNode():
4746       raise errors.OpPrereqError("Cannot readd the master node",
4747                                  errors.ECODE_STATE)
4748
4749     if self.op.readd and self.op.group:
4750       raise errors.OpPrereqError("Cannot pass a node group when a node is"
4751                                  " being readded", errors.ECODE_INVAL)
4752
4753   def BuildHooksEnv(self):
4754     """Build hooks env.
4755
4756     This will run on all nodes before, and on all nodes + the new node after.
4757
4758     """
4759     return {
4760       "OP_TARGET": self.op.node_name,
4761       "NODE_NAME": self.op.node_name,
4762       "NODE_PIP": self.op.primary_ip,
4763       "NODE_SIP": self.op.secondary_ip,
4764       "MASTER_CAPABLE": str(self.op.master_capable),
4765       "VM_CAPABLE": str(self.op.vm_capable),
4766       }
4767
4768   def BuildHooksNodes(self):
4769     """Build hooks nodes.
4770
4771     """
4772     # Exclude added node
4773     pre_nodes = list(set(self.cfg.GetNodeList()) - set([self.op.node_name]))
4774     post_nodes = pre_nodes + [self.op.node_name, ]
4775
4776     return (pre_nodes, post_nodes)
4777
4778   def CheckPrereq(self):
4779     """Check prerequisites.
4780
4781     This checks:
4782      - the new node is not already in the config
4783      - it is resolvable
4784      - its parameters (single/dual homed) matches the cluster
4785
4786     Any errors are signaled by raising errors.OpPrereqError.
4787
4788     """
4789     cfg = self.cfg
4790     hostname = self.hostname
4791     node = hostname.name
4792     primary_ip = self.op.primary_ip = hostname.ip
4793     if self.op.secondary_ip is None:
4794       if self.primary_ip_family == netutils.IP6Address.family:
4795         raise errors.OpPrereqError("When using a IPv6 primary address, a valid"
4796                                    " IPv4 address must be given as secondary",
4797                                    errors.ECODE_INVAL)
4798       self.op.secondary_ip = primary_ip
4799
4800     secondary_ip = self.op.secondary_ip
4801     if not netutils.IP4Address.IsValid(secondary_ip):
4802       raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
4803                                  " address" % secondary_ip, errors.ECODE_INVAL)
4804
4805     node_list = cfg.GetNodeList()
4806     if not self.op.readd and node in node_list:
4807       raise errors.OpPrereqError("Node %s is already in the configuration" %
4808                                  node, errors.ECODE_EXISTS)
4809     elif self.op.readd and node not in node_list:
4810       raise errors.OpPrereqError("Node %s is not in the configuration" % node,
4811                                  errors.ECODE_NOENT)
4812
4813     self.changed_primary_ip = False
4814
4815     for existing_node_name, existing_node in cfg.GetMultiNodeInfo(node_list):
4816       if self.op.readd and node == existing_node_name:
4817         if existing_node.secondary_ip != secondary_ip:
4818           raise errors.OpPrereqError("Readded node doesn't have the same IP"
4819                                      " address configuration as before",
4820                                      errors.ECODE_INVAL)
4821         if existing_node.primary_ip != primary_ip:
4822           self.changed_primary_ip = True
4823
4824         continue
4825
4826       if (existing_node.primary_ip == primary_ip or
4827           existing_node.secondary_ip == primary_ip or
4828           existing_node.primary_ip == secondary_ip or
4829           existing_node.secondary_ip == secondary_ip):
4830         raise errors.OpPrereqError("New node ip address(es) conflict with"
4831                                    " existing node %s" % existing_node.name,
4832                                    errors.ECODE_NOTUNIQUE)
4833
4834     # After this 'if' block, None is no longer a valid value for the
4835     # _capable op attributes
4836     if self.op.readd:
4837       old_node = self.cfg.GetNodeInfo(node)
4838       assert old_node is not None, "Can't retrieve locked node %s" % node
4839       for attr in self._NFLAGS:
4840         if getattr(self.op, attr) is None:
4841           setattr(self.op, attr, getattr(old_node, attr))
4842     else:
4843       for attr in self._NFLAGS:
4844         if getattr(self.op, attr) is None:
4845           setattr(self.op, attr, True)
4846
4847     if self.op.readd and not self.op.vm_capable:
4848       pri, sec = cfg.GetNodeInstances(node)
4849       if pri or sec:
4850         raise errors.OpPrereqError("Node %s being re-added with vm_capable"
4851                                    " flag set to false, but it already holds"
4852                                    " instances" % node,
4853                                    errors.ECODE_STATE)
4854
4855     # check that the type of the node (single versus dual homed) is the
4856     # same as for the master
4857     myself = cfg.GetNodeInfo(self.cfg.GetMasterNode())
4858     master_singlehomed = myself.secondary_ip == myself.primary_ip
4859     newbie_singlehomed = secondary_ip == primary_ip
4860     if master_singlehomed != newbie_singlehomed:
4861       if master_singlehomed:
4862         raise errors.OpPrereqError("The master has no secondary ip but the"
4863                                    " new node has one",
4864                                    errors.ECODE_INVAL)
4865       else:
4866         raise errors.OpPrereqError("The master has a secondary ip but the"
4867                                    " new node doesn't have one",
4868                                    errors.ECODE_INVAL)
4869
4870     # checks reachability
4871     if not netutils.TcpPing(primary_ip, constants.DEFAULT_NODED_PORT):
4872       raise errors.OpPrereqError("Node not reachable by ping",
4873                                  errors.ECODE_ENVIRON)
4874
4875     if not newbie_singlehomed:
4876       # check reachability from my secondary ip to newbie's secondary ip
4877       if not netutils.TcpPing(secondary_ip, constants.DEFAULT_NODED_PORT,
4878                            source=myself.secondary_ip):
4879         raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
4880                                    " based ping to node daemon port",
4881                                    errors.ECODE_ENVIRON)
4882
4883     if self.op.readd:
4884       exceptions = [node]
4885     else:
4886       exceptions = []
4887
4888     if self.op.master_capable:
4889       self.master_candidate = _DecideSelfPromotion(self, exceptions=exceptions)
4890     else:
4891       self.master_candidate = False
4892
4893     if self.op.readd:
4894       self.new_node = old_node
4895     else:
4896       node_group = cfg.LookupNodeGroup(self.op.group)
4897       self.new_node = objects.Node(name=node,
4898                                    primary_ip=primary_ip,
4899                                    secondary_ip=secondary_ip,
4900                                    master_candidate=self.master_candidate,
4901                                    offline=False, drained=False,
4902                                    group=node_group)
4903
4904     if self.op.ndparams:
4905       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
4906
4907   def Exec(self, feedback_fn):
4908     """Adds the new node to the cluster.
4909
4910     """
4911     new_node = self.new_node
4912     node = new_node.name
4913
4914     # We adding a new node so we assume it's powered
4915     new_node.powered = True
4916
4917     # for re-adds, reset the offline/drained/master-candidate flags;
4918     # we need to reset here, otherwise offline would prevent RPC calls
4919     # later in the procedure; this also means that if the re-add
4920     # fails, we are left with a non-offlined, broken node
4921     if self.op.readd:
4922       new_node.drained = new_node.offline = False # pylint: disable-msg=W0201
4923       self.LogInfo("Readding a node, the offline/drained flags were reset")
4924       # if we demote the node, we do cleanup later in the procedure
4925       new_node.master_candidate = self.master_candidate
4926       if self.changed_primary_ip:
4927         new_node.primary_ip = self.op.primary_ip
4928
4929     # copy the master/vm_capable flags
4930     for attr in self._NFLAGS:
4931       setattr(new_node, attr, getattr(self.op, attr))
4932
4933     # notify the user about any possible mc promotion
4934     if new_node.master_candidate:
4935       self.LogInfo("Node will be a master candidate")
4936
4937     if self.op.ndparams:
4938       new_node.ndparams = self.op.ndparams
4939     else:
4940       new_node.ndparams = {}
4941
4942     # check connectivity
4943     result = self.rpc.call_version([node])[node]
4944     result.Raise("Can't get version information from node %s" % node)
4945     if constants.PROTOCOL_VERSION == result.payload:
4946       logging.info("Communication to node %s fine, sw version %s match",
4947                    node, result.payload)
4948     else:
4949       raise errors.OpExecError("Version mismatch master version %s,"
4950                                " node version %s" %
4951                                (constants.PROTOCOL_VERSION, result.payload))
4952
4953     # Add node to our /etc/hosts, and add key to known_hosts
4954     if self.cfg.GetClusterInfo().modify_etc_hosts:
4955       master_node = self.cfg.GetMasterNode()
4956       result = self.rpc.call_etc_hosts_modify(master_node,
4957                                               constants.ETC_HOSTS_ADD,
4958                                               self.hostname.name,
4959                                               self.hostname.ip)
4960       result.Raise("Can't update hosts file with new host data")
4961
4962     if new_node.secondary_ip != new_node.primary_ip:
4963       _CheckNodeHasSecondaryIP(self, new_node.name, new_node.secondary_ip,
4964                                False)
4965
4966     node_verify_list = [self.cfg.GetMasterNode()]
4967     node_verify_param = {
4968       constants.NV_NODELIST: [node],
4969       # TODO: do a node-net-test as well?
4970     }
4971
4972     result = self.rpc.call_node_verify(node_verify_list, node_verify_param,
4973                                        self.cfg.GetClusterName())
4974     for verifier in node_verify_list:
4975       result[verifier].Raise("Cannot communicate with node %s" % verifier)
4976       nl_payload = result[verifier].payload[constants.NV_NODELIST]
4977       if nl_payload:
4978         for failed in nl_payload:
4979           feedback_fn("ssh/hostname verification failed"
4980                       " (checking from %s): %s" %
4981                       (verifier, nl_payload[failed]))
4982         raise errors.OpExecError("ssh/hostname verification failed")
4983
4984     if self.op.readd:
4985       _RedistributeAncillaryFiles(self)
4986       self.context.ReaddNode(new_node)
4987       # make sure we redistribute the config
4988       self.cfg.Update(new_node, feedback_fn)
4989       # and make sure the new node will not have old files around
4990       if not new_node.master_candidate:
4991         result = self.rpc.call_node_demote_from_mc(new_node.name)
4992         msg = result.fail_msg
4993         if msg:
4994           self.LogWarning("Node failed to demote itself from master"
4995                           " candidate status: %s" % msg)
4996     else:
4997       _RedistributeAncillaryFiles(self, additional_nodes=[node],
4998                                   additional_vm=self.op.vm_capable)
4999       self.context.AddNode(new_node, self.proc.GetECId())
5000
5001
5002 class LUNodeSetParams(LogicalUnit):
5003   """Modifies the parameters of a node.
5004
5005   @cvar _F2R: a dictionary from tuples of flags (mc, drained, offline)
5006       to the node role (as _ROLE_*)
5007   @cvar _R2F: a dictionary from node role to tuples of flags
5008   @cvar _FLAGS: a list of attribute names corresponding to the flags
5009
5010   """
5011   HPATH = "node-modify"
5012   HTYPE = constants.HTYPE_NODE
5013   REQ_BGL = False
5014   (_ROLE_CANDIDATE, _ROLE_DRAINED, _ROLE_OFFLINE, _ROLE_REGULAR) = range(4)
5015   _F2R = {
5016     (True, False, False): _ROLE_CANDIDATE,
5017     (False, True, False): _ROLE_DRAINED,
5018     (False, False, True): _ROLE_OFFLINE,
5019     (False, False, False): _ROLE_REGULAR,
5020     }
5021   _R2F = dict((v, k) for k, v in _F2R.items())
5022   _FLAGS = ["master_candidate", "drained", "offline"]
5023
5024   def CheckArguments(self):
5025     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5026     all_mods = [self.op.offline, self.op.master_candidate, self.op.drained,
5027                 self.op.master_capable, self.op.vm_capable,
5028                 self.op.secondary_ip, self.op.ndparams]
5029     if all_mods.count(None) == len(all_mods):
5030       raise errors.OpPrereqError("Please pass at least one modification",
5031                                  errors.ECODE_INVAL)
5032     if all_mods.count(True) > 1:
5033       raise errors.OpPrereqError("Can't set the node into more than one"
5034                                  " state at the same time",
5035                                  errors.ECODE_INVAL)
5036
5037     # Boolean value that tells us whether we might be demoting from MC
5038     self.might_demote = (self.op.master_candidate == False or
5039                          self.op.offline == True or
5040                          self.op.drained == True or
5041                          self.op.master_capable == False)
5042
5043     if self.op.secondary_ip:
5044       if not netutils.IP4Address.IsValid(self.op.secondary_ip):
5045         raise errors.OpPrereqError("Secondary IP (%s) needs to be a valid IPv4"
5046                                    " address" % self.op.secondary_ip,
5047                                    errors.ECODE_INVAL)
5048
5049     self.lock_all = self.op.auto_promote and self.might_demote
5050     self.lock_instances = self.op.secondary_ip is not None
5051
5052   def ExpandNames(self):
5053     if self.lock_all:
5054       self.needed_locks = {locking.LEVEL_NODE: locking.ALL_SET}
5055     else:
5056       self.needed_locks = {locking.LEVEL_NODE: self.op.node_name}
5057
5058     if self.lock_instances:
5059       self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
5060
5061   def DeclareLocks(self, level):
5062     # If we have locked all instances, before waiting to lock nodes, release
5063     # all the ones living on nodes unrelated to the current operation.
5064     if level == locking.LEVEL_NODE and self.lock_instances:
5065       self.affected_instances = []
5066       if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
5067         instances_keep = []
5068
5069         # Build list of instances to release
5070         locked_i = self.glm.list_owned(locking.LEVEL_INSTANCE)
5071         for instance_name, instance in self.cfg.GetMultiInstanceInfo(locked_i):
5072           if (instance.disk_template in constants.DTS_INT_MIRROR and
5073               self.op.node_name in instance.all_nodes):
5074             instances_keep.append(instance_name)
5075             self.affected_instances.append(instance)
5076
5077         _ReleaseLocks(self, locking.LEVEL_INSTANCE, keep=instances_keep)
5078
5079         assert (set(self.glm.list_owned(locking.LEVEL_INSTANCE)) ==
5080                 set(instances_keep))
5081
5082   def BuildHooksEnv(self):
5083     """Build hooks env.
5084
5085     This runs on the master node.
5086
5087     """
5088     return {
5089       "OP_TARGET": self.op.node_name,
5090       "MASTER_CANDIDATE": str(self.op.master_candidate),
5091       "OFFLINE": str(self.op.offline),
5092       "DRAINED": str(self.op.drained),
5093       "MASTER_CAPABLE": str(self.op.master_capable),
5094       "VM_CAPABLE": str(self.op.vm_capable),
5095       }
5096
5097   def BuildHooksNodes(self):
5098     """Build hooks nodes.
5099
5100     """
5101     nl = [self.cfg.GetMasterNode(), self.op.node_name]
5102     return (nl, nl)
5103
5104   def CheckPrereq(self):
5105     """Check prerequisites.
5106
5107     This only checks the instance list against the existing names.
5108
5109     """
5110     node = self.node = self.cfg.GetNodeInfo(self.op.node_name)
5111
5112     if (self.op.master_candidate is not None or
5113         self.op.drained is not None or
5114         self.op.offline is not None):
5115       # we can't change the master's node flags
5116       if self.op.node_name == self.cfg.GetMasterNode():
5117         raise errors.OpPrereqError("The master role can be changed"
5118                                    " only via master-failover",
5119                                    errors.ECODE_INVAL)
5120
5121     if self.op.master_candidate and not node.master_capable:
5122       raise errors.OpPrereqError("Node %s is not master capable, cannot make"
5123                                  " it a master candidate" % node.name,
5124                                  errors.ECODE_STATE)
5125
5126     if self.op.vm_capable == False:
5127       (ipri, isec) = self.cfg.GetNodeInstances(self.op.node_name)
5128       if ipri or isec:
5129         raise errors.OpPrereqError("Node %s hosts instances, cannot unset"
5130                                    " the vm_capable flag" % node.name,
5131                                    errors.ECODE_STATE)
5132
5133     if node.master_candidate and self.might_demote and not self.lock_all:
5134       assert not self.op.auto_promote, "auto_promote set but lock_all not"
5135       # check if after removing the current node, we're missing master
5136       # candidates
5137       (mc_remaining, mc_should, _) = \
5138           self.cfg.GetMasterCandidateStats(exceptions=[node.name])
5139       if mc_remaining < mc_should:
5140         raise errors.OpPrereqError("Not enough master candidates, please"
5141                                    " pass auto promote option to allow"
5142                                    " promotion", errors.ECODE_STATE)
5143
5144     self.old_flags = old_flags = (node.master_candidate,
5145                                   node.drained, node.offline)
5146     assert old_flags in self._F2R, "Un-handled old flags %s" % str(old_flags)
5147     self.old_role = old_role = self._F2R[old_flags]
5148
5149     # Check for ineffective changes
5150     for attr in self._FLAGS:
5151       if (getattr(self.op, attr) == False and getattr(node, attr) == False):
5152         self.LogInfo("Ignoring request to unset flag %s, already unset", attr)
5153         setattr(self.op, attr, None)
5154
5155     # Past this point, any flag change to False means a transition
5156     # away from the respective state, as only real changes are kept
5157
5158     # TODO: We might query the real power state if it supports OOB
5159     if _SupportsOob(self.cfg, node):
5160       if self.op.offline is False and not (node.powered or
5161                                            self.op.powered == True):
5162         raise errors.OpPrereqError(("Node %s needs to be turned on before its"
5163                                     " offline status can be reset") %
5164                                    self.op.node_name)
5165     elif self.op.powered is not None:
5166       raise errors.OpPrereqError(("Unable to change powered state for node %s"
5167                                   " as it does not support out-of-band"
5168                                   " handling") % self.op.node_name)
5169
5170     # If we're being deofflined/drained, we'll MC ourself if needed
5171     if (self.op.drained == False or self.op.offline == False or
5172         (self.op.master_capable and not node.master_capable)):
5173       if _DecideSelfPromotion(self):
5174         self.op.master_candidate = True
5175         self.LogInfo("Auto-promoting node to master candidate")
5176
5177     # If we're no longer master capable, we'll demote ourselves from MC
5178     if self.op.master_capable == False and node.master_candidate:
5179       self.LogInfo("Demoting from master candidate")
5180       self.op.master_candidate = False
5181
5182     # Compute new role
5183     assert [getattr(self.op, attr) for attr in self._FLAGS].count(True) <= 1
5184     if self.op.master_candidate:
5185       new_role = self._ROLE_CANDIDATE
5186     elif self.op.drained:
5187       new_role = self._ROLE_DRAINED
5188     elif self.op.offline:
5189       new_role = self._ROLE_OFFLINE
5190     elif False in [self.op.master_candidate, self.op.drained, self.op.offline]:
5191       # False is still in new flags, which means we're un-setting (the
5192       # only) True flag
5193       new_role = self._ROLE_REGULAR
5194     else: # no new flags, nothing, keep old role
5195       new_role = old_role
5196
5197     self.new_role = new_role
5198
5199     if old_role == self._ROLE_OFFLINE and new_role != old_role:
5200       # Trying to transition out of offline status
5201       result = self.rpc.call_version([node.name])[node.name]
5202       if result.fail_msg:
5203         raise errors.OpPrereqError("Node %s is being de-offlined but fails"
5204                                    " to report its version: %s" %
5205                                    (node.name, result.fail_msg),
5206                                    errors.ECODE_STATE)
5207       else:
5208         self.LogWarning("Transitioning node from offline to online state"
5209                         " without using re-add. Please make sure the node"
5210                         " is healthy!")
5211
5212     if self.op.secondary_ip:
5213       # Ok even without locking, because this can't be changed by any LU
5214       master = self.cfg.GetNodeInfo(self.cfg.GetMasterNode())
5215       master_singlehomed = master.secondary_ip == master.primary_ip
5216       if master_singlehomed and self.op.secondary_ip:
5217         raise errors.OpPrereqError("Cannot change the secondary ip on a single"
5218                                    " homed cluster", errors.ECODE_INVAL)
5219
5220       if node.offline:
5221         if self.affected_instances:
5222           raise errors.OpPrereqError("Cannot change secondary ip: offline"
5223                                      " node has instances (%s) configured"
5224                                      " to use it" % self.affected_instances)
5225       else:
5226         # On online nodes, check that no instances are running, and that
5227         # the node has the new ip and we can reach it.
5228         for instance in self.affected_instances:
5229           _CheckInstanceDown(self, instance, "cannot change secondary ip")
5230
5231         _CheckNodeHasSecondaryIP(self, node.name, self.op.secondary_ip, True)
5232         if master.name != node.name:
5233           # check reachability from master secondary ip to new secondary ip
5234           if not netutils.TcpPing(self.op.secondary_ip,
5235                                   constants.DEFAULT_NODED_PORT,
5236                                   source=master.secondary_ip):
5237             raise errors.OpPrereqError("Node secondary ip not reachable by TCP"
5238                                        " based ping to node daemon port",
5239                                        errors.ECODE_ENVIRON)
5240
5241     if self.op.ndparams:
5242       new_ndparams = _GetUpdatedParams(self.node.ndparams, self.op.ndparams)
5243       utils.ForceDictType(new_ndparams, constants.NDS_PARAMETER_TYPES)
5244       self.new_ndparams = new_ndparams
5245
5246   def Exec(self, feedback_fn):
5247     """Modifies a node.
5248
5249     """
5250     node = self.node
5251     old_role = self.old_role
5252     new_role = self.new_role
5253
5254     result = []
5255
5256     if self.op.ndparams:
5257       node.ndparams = self.new_ndparams
5258
5259     if self.op.powered is not None:
5260       node.powered = self.op.powered
5261
5262     for attr in ["master_capable", "vm_capable"]:
5263       val = getattr(self.op, attr)
5264       if val is not None:
5265         setattr(node, attr, val)
5266         result.append((attr, str(val)))
5267
5268     if new_role != old_role:
5269       # Tell the node to demote itself, if no longer MC and not offline
5270       if old_role == self._ROLE_CANDIDATE and new_role != self._ROLE_OFFLINE:
5271         msg = self.rpc.call_node_demote_from_mc(node.name).fail_msg
5272         if msg:
5273           self.LogWarning("Node failed to demote itself: %s", msg)
5274
5275       new_flags = self._R2F[new_role]
5276       for of, nf, desc in zip(self.old_flags, new_flags, self._FLAGS):
5277         if of != nf:
5278           result.append((desc, str(nf)))
5279       (node.master_candidate, node.drained, node.offline) = new_flags
5280
5281       # we locked all nodes, we adjust the CP before updating this node
5282       if self.lock_all:
5283         _AdjustCandidatePool(self, [node.name])
5284
5285     if self.op.secondary_ip:
5286       node.secondary_ip = self.op.secondary_ip
5287       result.append(("secondary_ip", self.op.secondary_ip))
5288
5289     # this will trigger configuration file update, if needed
5290     self.cfg.Update(node, feedback_fn)
5291
5292     # this will trigger job queue propagation or cleanup if the mc
5293     # flag changed
5294     if [old_role, new_role].count(self._ROLE_CANDIDATE) == 1:
5295       self.context.ReaddNode(node)
5296
5297     return result
5298
5299
5300 class LUNodePowercycle(NoHooksLU):
5301   """Powercycles a node.
5302
5303   """
5304   REQ_BGL = False
5305
5306   def CheckArguments(self):
5307     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
5308     if self.op.node_name == self.cfg.GetMasterNode() and not self.op.force:
5309       raise errors.OpPrereqError("The node is the master and the force"
5310                                  " parameter was not set",
5311                                  errors.ECODE_INVAL)
5312
5313   def ExpandNames(self):
5314     """Locking for PowercycleNode.
5315
5316     This is a last-resort option and shouldn't block on other
5317     jobs. Therefore, we grab no locks.
5318
5319     """
5320     self.needed_locks = {}
5321
5322   def Exec(self, feedback_fn):
5323     """Reboots a node.
5324
5325     """
5326     result = self.rpc.call_node_powercycle(self.op.node_name,
5327                                            self.cfg.GetHypervisorType())
5328     result.Raise("Failed to schedule the reboot")
5329     return result.payload
5330
5331
5332 class LUClusterQuery(NoHooksLU):
5333   """Query cluster configuration.
5334
5335   """
5336   REQ_BGL = False
5337
5338   def ExpandNames(self):
5339     self.needed_locks = {}
5340
5341   def Exec(self, feedback_fn):
5342     """Return cluster config.
5343
5344     """
5345     cluster = self.cfg.GetClusterInfo()
5346     os_hvp = {}
5347
5348     # Filter just for enabled hypervisors
5349     for os_name, hv_dict in cluster.os_hvp.items():
5350       os_hvp[os_name] = {}
5351       for hv_name, hv_params in hv_dict.items():
5352         if hv_name in cluster.enabled_hypervisors:
5353           os_hvp[os_name][hv_name] = hv_params
5354
5355     # Convert ip_family to ip_version
5356     primary_ip_version = constants.IP4_VERSION
5357     if cluster.primary_ip_family == netutils.IP6Address.family:
5358       primary_ip_version = constants.IP6_VERSION
5359
5360     result = {
5361       "software_version": constants.RELEASE_VERSION,
5362       "protocol_version": constants.PROTOCOL_VERSION,
5363       "config_version": constants.CONFIG_VERSION,
5364       "os_api_version": max(constants.OS_API_VERSIONS),
5365       "export_version": constants.EXPORT_VERSION,
5366       "architecture": (platform.architecture()[0], platform.machine()),
5367       "name": cluster.cluster_name,
5368       "master": cluster.master_node,
5369       "default_hypervisor": cluster.enabled_hypervisors[0],
5370       "enabled_hypervisors": cluster.enabled_hypervisors,
5371       "hvparams": dict([(hypervisor_name, cluster.hvparams[hypervisor_name])
5372                         for hypervisor_name in cluster.enabled_hypervisors]),
5373       "os_hvp": os_hvp,
5374       "beparams": cluster.beparams,
5375       "osparams": cluster.osparams,
5376       "nicparams": cluster.nicparams,
5377       "ndparams": cluster.ndparams,
5378       "candidate_pool_size": cluster.candidate_pool_size,
5379       "master_netdev": cluster.master_netdev,
5380       "volume_group_name": cluster.volume_group_name,
5381       "drbd_usermode_helper": cluster.drbd_usermode_helper,
5382       "file_storage_dir": cluster.file_storage_dir,
5383       "shared_file_storage_dir": cluster.shared_file_storage_dir,
5384       "maintain_node_health": cluster.maintain_node_health,
5385       "ctime": cluster.ctime,
5386       "mtime": cluster.mtime,
5387       "uuid": cluster.uuid,
5388       "tags": list(cluster.GetTags()),
5389       "uid_pool": cluster.uid_pool,
5390       "default_iallocator": cluster.default_iallocator,
5391       "reserved_lvs": cluster.reserved_lvs,
5392       "primary_ip_version": primary_ip_version,
5393       "prealloc_wipe_disks": cluster.prealloc_wipe_disks,
5394       "hidden_os": cluster.hidden_os,
5395       "blacklisted_os": cluster.blacklisted_os,
5396       }
5397
5398     return result
5399
5400
5401 class LUClusterConfigQuery(NoHooksLU):
5402   """Return configuration values.
5403
5404   """
5405   REQ_BGL = False
5406   _FIELDS_DYNAMIC = utils.FieldSet()
5407   _FIELDS_STATIC = utils.FieldSet("cluster_name", "master_node", "drain_flag",
5408                                   "watcher_pause", "volume_group_name")
5409
5410   def CheckArguments(self):
5411     _CheckOutputFields(static=self._FIELDS_STATIC,
5412                        dynamic=self._FIELDS_DYNAMIC,
5413                        selected=self.op.output_fields)
5414
5415   def ExpandNames(self):
5416     self.needed_locks = {}
5417
5418   def Exec(self, feedback_fn):
5419     """Dump a representation of the cluster config to the standard output.
5420
5421     """
5422     values = []
5423     for field in self.op.output_fields:
5424       if field == "cluster_name":
5425         entry = self.cfg.GetClusterName()
5426       elif field == "master_node":
5427         entry = self.cfg.GetMasterNode()
5428       elif field == "drain_flag":
5429         entry = os.path.exists(constants.JOB_QUEUE_DRAIN_FILE)
5430       elif field == "watcher_pause":
5431         entry = utils.ReadWatcherPauseFile(constants.WATCHER_PAUSEFILE)
5432       elif field == "volume_group_name":
5433         entry = self.cfg.GetVGName()
5434       else:
5435         raise errors.ParameterError(field)
5436       values.append(entry)
5437     return values
5438
5439
5440 class LUInstanceActivateDisks(NoHooksLU):
5441   """Bring up an instance's disks.
5442
5443   """
5444   REQ_BGL = False
5445
5446   def ExpandNames(self):
5447     self._ExpandAndLockInstance()
5448     self.needed_locks[locking.LEVEL_NODE] = []
5449     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5450
5451   def DeclareLocks(self, level):
5452     if level == locking.LEVEL_NODE:
5453       self._LockInstancesNodes()
5454
5455   def CheckPrereq(self):
5456     """Check prerequisites.
5457
5458     This checks that the instance is in the cluster.
5459
5460     """
5461     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5462     assert self.instance is not None, \
5463       "Cannot retrieve locked instance %s" % self.op.instance_name
5464     _CheckNodeOnline(self, self.instance.primary_node)
5465
5466   def Exec(self, feedback_fn):
5467     """Activate the disks.
5468
5469     """
5470     disks_ok, disks_info = \
5471               _AssembleInstanceDisks(self, self.instance,
5472                                      ignore_size=self.op.ignore_size)
5473     if not disks_ok:
5474       raise errors.OpExecError("Cannot activate block devices")
5475
5476     return disks_info
5477
5478
5479 def _AssembleInstanceDisks(lu, instance, disks=None, ignore_secondaries=False,
5480                            ignore_size=False):
5481   """Prepare the block devices for an instance.
5482
5483   This sets up the block devices on all nodes.
5484
5485   @type lu: L{LogicalUnit}
5486   @param lu: the logical unit on whose behalf we execute
5487   @type instance: L{objects.Instance}
5488   @param instance: the instance for whose disks we assemble
5489   @type disks: list of L{objects.Disk} or None
5490   @param disks: which disks to assemble (or all, if None)
5491   @type ignore_secondaries: boolean
5492   @param ignore_secondaries: if true, errors on secondary nodes
5493       won't result in an error return from the function
5494   @type ignore_size: boolean
5495   @param ignore_size: if true, the current known size of the disk
5496       will not be used during the disk activation, useful for cases
5497       when the size is wrong
5498   @return: False if the operation failed, otherwise a list of
5499       (host, instance_visible_name, node_visible_name)
5500       with the mapping from node devices to instance devices
5501
5502   """
5503   device_info = []
5504   disks_ok = True
5505   iname = instance.name
5506   disks = _ExpandCheckDisks(instance, disks)
5507
5508   # With the two passes mechanism we try to reduce the window of
5509   # opportunity for the race condition of switching DRBD to primary
5510   # before handshaking occured, but we do not eliminate it
5511
5512   # The proper fix would be to wait (with some limits) until the
5513   # connection has been made and drbd transitions from WFConnection
5514   # into any other network-connected state (Connected, SyncTarget,
5515   # SyncSource, etc.)
5516
5517   # 1st pass, assemble on all nodes in secondary mode
5518   for idx, inst_disk in enumerate(disks):
5519     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5520       if ignore_size:
5521         node_disk = node_disk.Copy()
5522         node_disk.UnsetSize()
5523       lu.cfg.SetDiskID(node_disk, node)
5524       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, False, idx)
5525       msg = result.fail_msg
5526       if msg:
5527         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5528                            " (is_primary=False, pass=1): %s",
5529                            inst_disk.iv_name, node, msg)
5530         if not ignore_secondaries:
5531           disks_ok = False
5532
5533   # FIXME: race condition on drbd migration to primary
5534
5535   # 2nd pass, do only the primary node
5536   for idx, inst_disk in enumerate(disks):
5537     dev_path = None
5538
5539     for node, node_disk in inst_disk.ComputeNodeTree(instance.primary_node):
5540       if node != instance.primary_node:
5541         continue
5542       if ignore_size:
5543         node_disk = node_disk.Copy()
5544         node_disk.UnsetSize()
5545       lu.cfg.SetDiskID(node_disk, node)
5546       result = lu.rpc.call_blockdev_assemble(node, node_disk, iname, True, idx)
5547       msg = result.fail_msg
5548       if msg:
5549         lu.proc.LogWarning("Could not prepare block device %s on node %s"
5550                            " (is_primary=True, pass=2): %s",
5551                            inst_disk.iv_name, node, msg)
5552         disks_ok = False
5553       else:
5554         dev_path = result.payload
5555
5556     device_info.append((instance.primary_node, inst_disk.iv_name, dev_path))
5557
5558   # leave the disks configured for the primary node
5559   # this is a workaround that would be fixed better by
5560   # improving the logical/physical id handling
5561   for disk in disks:
5562     lu.cfg.SetDiskID(disk, instance.primary_node)
5563
5564   return disks_ok, device_info
5565
5566
5567 def _StartInstanceDisks(lu, instance, force):
5568   """Start the disks of an instance.
5569
5570   """
5571   disks_ok, _ = _AssembleInstanceDisks(lu, instance,
5572                                            ignore_secondaries=force)
5573   if not disks_ok:
5574     _ShutdownInstanceDisks(lu, instance)
5575     if force is not None and not force:
5576       lu.proc.LogWarning("", hint="If the message above refers to a"
5577                          " secondary node,"
5578                          " you can retry the operation using '--force'.")
5579     raise errors.OpExecError("Disk consistency error")
5580
5581
5582 class LUInstanceDeactivateDisks(NoHooksLU):
5583   """Shutdown an instance's disks.
5584
5585   """
5586   REQ_BGL = False
5587
5588   def ExpandNames(self):
5589     self._ExpandAndLockInstance()
5590     self.needed_locks[locking.LEVEL_NODE] = []
5591     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
5592
5593   def DeclareLocks(self, level):
5594     if level == locking.LEVEL_NODE:
5595       self._LockInstancesNodes()
5596
5597   def CheckPrereq(self):
5598     """Check prerequisites.
5599
5600     This checks that the instance is in the cluster.
5601
5602     """
5603     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5604     assert self.instance is not None, \
5605       "Cannot retrieve locked instance %s" % self.op.instance_name
5606
5607   def Exec(self, feedback_fn):
5608     """Deactivate the disks
5609
5610     """
5611     instance = self.instance
5612     if self.op.force:
5613       _ShutdownInstanceDisks(self, instance)
5614     else:
5615       _SafeShutdownInstanceDisks(self, instance)
5616
5617
5618 def _SafeShutdownInstanceDisks(lu, instance, disks=None):
5619   """Shutdown block devices of an instance.
5620
5621   This function checks if an instance is running, before calling
5622   _ShutdownInstanceDisks.
5623
5624   """
5625   _CheckInstanceDown(lu, instance, "cannot shutdown disks")
5626   _ShutdownInstanceDisks(lu, instance, disks=disks)
5627
5628
5629 def _ExpandCheckDisks(instance, disks):
5630   """Return the instance disks selected by the disks list
5631
5632   @type disks: list of L{objects.Disk} or None
5633   @param disks: selected disks
5634   @rtype: list of L{objects.Disk}
5635   @return: selected instance disks to act on
5636
5637   """
5638   if disks is None:
5639     return instance.disks
5640   else:
5641     if not set(disks).issubset(instance.disks):
5642       raise errors.ProgrammerError("Can only act on disks belonging to the"
5643                                    " target instance")
5644     return disks
5645
5646
5647 def _ShutdownInstanceDisks(lu, instance, disks=None, ignore_primary=False):
5648   """Shutdown block devices of an instance.
5649
5650   This does the shutdown on all nodes of the instance.
5651
5652   If the ignore_primary is false, errors on the primary node are
5653   ignored.
5654
5655   """
5656   all_result = True
5657   disks = _ExpandCheckDisks(instance, disks)
5658
5659   for disk in disks:
5660     for node, top_disk in disk.ComputeNodeTree(instance.primary_node):
5661       lu.cfg.SetDiskID(top_disk, node)
5662       result = lu.rpc.call_blockdev_shutdown(node, top_disk)
5663       msg = result.fail_msg
5664       if msg:
5665         lu.LogWarning("Could not shutdown block device %s on node %s: %s",
5666                       disk.iv_name, node, msg)
5667         if ((node == instance.primary_node and not ignore_primary) or
5668             (node != instance.primary_node and not result.offline)):
5669           all_result = False
5670   return all_result
5671
5672
5673 def _CheckNodeFreeMemory(lu, node, reason, requested, hypervisor_name):
5674   """Checks if a node has enough free memory.
5675
5676   This function check if a given node has the needed amount of free
5677   memory. In case the node has less memory or we cannot get the
5678   information from the node, this function raise an OpPrereqError
5679   exception.
5680
5681   @type lu: C{LogicalUnit}
5682   @param lu: a logical unit from which we get configuration data
5683   @type node: C{str}
5684   @param node: the node to check
5685   @type reason: C{str}
5686   @param reason: string to use in the error message
5687   @type requested: C{int}
5688   @param requested: the amount of memory in MiB to check for
5689   @type hypervisor_name: C{str}
5690   @param hypervisor_name: the hypervisor to ask for memory stats
5691   @raise errors.OpPrereqError: if the node doesn't have enough memory, or
5692       we cannot check the node
5693
5694   """
5695   nodeinfo = lu.rpc.call_node_info([node], None, hypervisor_name)
5696   nodeinfo[node].Raise("Can't get data from node %s" % node,
5697                        prereq=True, ecode=errors.ECODE_ENVIRON)
5698   free_mem = nodeinfo[node].payload.get("memory_free", None)
5699   if not isinstance(free_mem, int):
5700     raise errors.OpPrereqError("Can't compute free memory on node %s, result"
5701                                " was '%s'" % (node, free_mem),
5702                                errors.ECODE_ENVIRON)
5703   if requested > free_mem:
5704     raise errors.OpPrereqError("Not enough memory on node %s for %s:"
5705                                " needed %s MiB, available %s MiB" %
5706                                (node, reason, requested, free_mem),
5707                                errors.ECODE_NORES)
5708
5709
5710 def _CheckNodesFreeDiskPerVG(lu, nodenames, req_sizes):
5711   """Checks if nodes have enough free disk space in the all VGs.
5712
5713   This function check if all given nodes have the needed amount of
5714   free disk. In case any node has less disk or we cannot get the
5715   information from the node, this function raise an OpPrereqError
5716   exception.
5717
5718   @type lu: C{LogicalUnit}
5719   @param lu: a logical unit from which we get configuration data
5720   @type nodenames: C{list}
5721   @param nodenames: the list of node names to check
5722   @type req_sizes: C{dict}
5723   @param req_sizes: the hash of vg and corresponding amount of disk in
5724       MiB to check for
5725   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5726       or we cannot check the node
5727
5728   """
5729   for vg, req_size in req_sizes.items():
5730     _CheckNodesFreeDiskOnVG(lu, nodenames, vg, req_size)
5731
5732
5733 def _CheckNodesFreeDiskOnVG(lu, nodenames, vg, requested):
5734   """Checks if nodes have enough free disk space in the specified VG.
5735
5736   This function check if all given nodes have the needed amount of
5737   free disk. In case any node has less disk or we cannot get the
5738   information from the node, this function raise an OpPrereqError
5739   exception.
5740
5741   @type lu: C{LogicalUnit}
5742   @param lu: a logical unit from which we get configuration data
5743   @type nodenames: C{list}
5744   @param nodenames: the list of node names to check
5745   @type vg: C{str}
5746   @param vg: the volume group to check
5747   @type requested: C{int}
5748   @param requested: the amount of disk in MiB to check for
5749   @raise errors.OpPrereqError: if the node doesn't have enough disk,
5750       or we cannot check the node
5751
5752   """
5753   nodeinfo = lu.rpc.call_node_info(nodenames, vg, None)
5754   for node in nodenames:
5755     info = nodeinfo[node]
5756     info.Raise("Cannot get current information from node %s" % node,
5757                prereq=True, ecode=errors.ECODE_ENVIRON)
5758     vg_free = info.payload.get("vg_free", None)
5759     if not isinstance(vg_free, int):
5760       raise errors.OpPrereqError("Can't compute free disk space on node"
5761                                  " %s for vg %s, result was '%s'" %
5762                                  (node, vg, vg_free), errors.ECODE_ENVIRON)
5763     if requested > vg_free:
5764       raise errors.OpPrereqError("Not enough disk space on target node %s"
5765                                  " vg %s: required %d MiB, available %d MiB" %
5766                                  (node, vg, requested, vg_free),
5767                                  errors.ECODE_NORES)
5768
5769
5770 class LUInstanceStartup(LogicalUnit):
5771   """Starts an instance.
5772
5773   """
5774   HPATH = "instance-start"
5775   HTYPE = constants.HTYPE_INSTANCE
5776   REQ_BGL = False
5777
5778   def CheckArguments(self):
5779     # extra beparams
5780     if self.op.beparams:
5781       # fill the beparams dict
5782       utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
5783
5784   def ExpandNames(self):
5785     self._ExpandAndLockInstance()
5786
5787   def BuildHooksEnv(self):
5788     """Build hooks env.
5789
5790     This runs on master, primary and secondary nodes of the instance.
5791
5792     """
5793     env = {
5794       "FORCE": self.op.force,
5795       }
5796
5797     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5798
5799     return env
5800
5801   def BuildHooksNodes(self):
5802     """Build hooks nodes.
5803
5804     """
5805     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5806     return (nl, nl)
5807
5808   def CheckPrereq(self):
5809     """Check prerequisites.
5810
5811     This checks that the instance is in the cluster.
5812
5813     """
5814     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5815     assert self.instance is not None, \
5816       "Cannot retrieve locked instance %s" % self.op.instance_name
5817
5818     # extra hvparams
5819     if self.op.hvparams:
5820       # check hypervisor parameter syntax (locally)
5821       cluster = self.cfg.GetClusterInfo()
5822       utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
5823       filled_hvp = cluster.FillHV(instance)
5824       filled_hvp.update(self.op.hvparams)
5825       hv_type = hypervisor.GetHypervisor(instance.hypervisor)
5826       hv_type.CheckParameterSyntax(filled_hvp)
5827       _CheckHVParams(self, instance.all_nodes, instance.hypervisor, filled_hvp)
5828
5829     self.primary_offline = self.cfg.GetNodeInfo(instance.primary_node).offline
5830
5831     if self.primary_offline and self.op.ignore_offline_nodes:
5832       self.proc.LogWarning("Ignoring offline primary node")
5833
5834       if self.op.hvparams or self.op.beparams:
5835         self.proc.LogWarning("Overridden parameters are ignored")
5836     else:
5837       _CheckNodeOnline(self, instance.primary_node)
5838
5839       bep = self.cfg.GetClusterInfo().FillBE(instance)
5840
5841       # check bridges existence
5842       _CheckInstanceBridgesExist(self, instance)
5843
5844       remote_info = self.rpc.call_instance_info(instance.primary_node,
5845                                                 instance.name,
5846                                                 instance.hypervisor)
5847       remote_info.Raise("Error checking node %s" % instance.primary_node,
5848                         prereq=True, ecode=errors.ECODE_ENVIRON)
5849       if not remote_info.payload: # not running already
5850         _CheckNodeFreeMemory(self, instance.primary_node,
5851                              "starting instance %s" % instance.name,
5852                              bep[constants.BE_MEMORY], instance.hypervisor)
5853
5854   def Exec(self, feedback_fn):
5855     """Start the instance.
5856
5857     """
5858     instance = self.instance
5859     force = self.op.force
5860
5861     if not self.op.no_remember:
5862       self.cfg.MarkInstanceUp(instance.name)
5863
5864     if self.primary_offline:
5865       assert self.op.ignore_offline_nodes
5866       self.proc.LogInfo("Primary node offline, marked instance as started")
5867     else:
5868       node_current = instance.primary_node
5869
5870       _StartInstanceDisks(self, instance, force)
5871
5872       result = self.rpc.call_instance_start(node_current, instance,
5873                                             self.op.hvparams, self.op.beparams,
5874                                             self.op.startup_paused)
5875       msg = result.fail_msg
5876       if msg:
5877         _ShutdownInstanceDisks(self, instance)
5878         raise errors.OpExecError("Could not start instance: %s" % msg)
5879
5880
5881 class LUInstanceReboot(LogicalUnit):
5882   """Reboot an instance.
5883
5884   """
5885   HPATH = "instance-reboot"
5886   HTYPE = constants.HTYPE_INSTANCE
5887   REQ_BGL = False
5888
5889   def ExpandNames(self):
5890     self._ExpandAndLockInstance()
5891
5892   def BuildHooksEnv(self):
5893     """Build hooks env.
5894
5895     This runs on master, primary and secondary nodes of the instance.
5896
5897     """
5898     env = {
5899       "IGNORE_SECONDARIES": self.op.ignore_secondaries,
5900       "REBOOT_TYPE": self.op.reboot_type,
5901       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
5902       }
5903
5904     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
5905
5906     return env
5907
5908   def BuildHooksNodes(self):
5909     """Build hooks nodes.
5910
5911     """
5912     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
5913     return (nl, nl)
5914
5915   def CheckPrereq(self):
5916     """Check prerequisites.
5917
5918     This checks that the instance is in the cluster.
5919
5920     """
5921     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
5922     assert self.instance is not None, \
5923       "Cannot retrieve locked instance %s" % self.op.instance_name
5924
5925     _CheckNodeOnline(self, instance.primary_node)
5926
5927     # check bridges existence
5928     _CheckInstanceBridgesExist(self, instance)
5929
5930   def Exec(self, feedback_fn):
5931     """Reboot the instance.
5932
5933     """
5934     instance = self.instance
5935     ignore_secondaries = self.op.ignore_secondaries
5936     reboot_type = self.op.reboot_type
5937
5938     remote_info = self.rpc.call_instance_info(instance.primary_node,
5939                                               instance.name,
5940                                               instance.hypervisor)
5941     remote_info.Raise("Error checking node %s" % instance.primary_node)
5942     instance_running = bool(remote_info.payload)
5943
5944     node_current = instance.primary_node
5945
5946     if instance_running and reboot_type in [constants.INSTANCE_REBOOT_SOFT,
5947                                             constants.INSTANCE_REBOOT_HARD]:
5948       for disk in instance.disks:
5949         self.cfg.SetDiskID(disk, node_current)
5950       result = self.rpc.call_instance_reboot(node_current, instance,
5951                                              reboot_type,
5952                                              self.op.shutdown_timeout)
5953       result.Raise("Could not reboot instance")
5954     else:
5955       if instance_running:
5956         result = self.rpc.call_instance_shutdown(node_current, instance,
5957                                                  self.op.shutdown_timeout)
5958         result.Raise("Could not shutdown instance for full reboot")
5959         _ShutdownInstanceDisks(self, instance)
5960       else:
5961         self.LogInfo("Instance %s was already stopped, starting now",
5962                      instance.name)
5963       _StartInstanceDisks(self, instance, ignore_secondaries)
5964       result = self.rpc.call_instance_start(node_current, instance,
5965                                             None, None, False)
5966       msg = result.fail_msg
5967       if msg:
5968         _ShutdownInstanceDisks(self, instance)
5969         raise errors.OpExecError("Could not start instance for"
5970                                  " full reboot: %s" % msg)
5971
5972     self.cfg.MarkInstanceUp(instance.name)
5973
5974
5975 class LUInstanceShutdown(LogicalUnit):
5976   """Shutdown an instance.
5977
5978   """
5979   HPATH = "instance-stop"
5980   HTYPE = constants.HTYPE_INSTANCE
5981   REQ_BGL = False
5982
5983   def ExpandNames(self):
5984     self._ExpandAndLockInstance()
5985
5986   def BuildHooksEnv(self):
5987     """Build hooks env.
5988
5989     This runs on master, primary and secondary nodes of the instance.
5990
5991     """
5992     env = _BuildInstanceHookEnvByObject(self, self.instance)
5993     env["TIMEOUT"] = self.op.timeout
5994     return env
5995
5996   def BuildHooksNodes(self):
5997     """Build hooks nodes.
5998
5999     """
6000     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6001     return (nl, nl)
6002
6003   def CheckPrereq(self):
6004     """Check prerequisites.
6005
6006     This checks that the instance is in the cluster.
6007
6008     """
6009     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6010     assert self.instance is not None, \
6011       "Cannot retrieve locked instance %s" % self.op.instance_name
6012
6013     self.primary_offline = \
6014       self.cfg.GetNodeInfo(self.instance.primary_node).offline
6015
6016     if self.primary_offline and self.op.ignore_offline_nodes:
6017       self.proc.LogWarning("Ignoring offline primary node")
6018     else:
6019       _CheckNodeOnline(self, self.instance.primary_node)
6020
6021   def Exec(self, feedback_fn):
6022     """Shutdown the instance.
6023
6024     """
6025     instance = self.instance
6026     node_current = instance.primary_node
6027     timeout = self.op.timeout
6028
6029     if not self.op.no_remember:
6030       self.cfg.MarkInstanceDown(instance.name)
6031
6032     if self.primary_offline:
6033       assert self.op.ignore_offline_nodes
6034       self.proc.LogInfo("Primary node offline, marked instance as stopped")
6035     else:
6036       result = self.rpc.call_instance_shutdown(node_current, instance, timeout)
6037       msg = result.fail_msg
6038       if msg:
6039         self.proc.LogWarning("Could not shutdown instance: %s" % msg)
6040
6041       _ShutdownInstanceDisks(self, instance)
6042
6043
6044 class LUInstanceReinstall(LogicalUnit):
6045   """Reinstall an instance.
6046
6047   """
6048   HPATH = "instance-reinstall"
6049   HTYPE = constants.HTYPE_INSTANCE
6050   REQ_BGL = False
6051
6052   def ExpandNames(self):
6053     self._ExpandAndLockInstance()
6054
6055   def BuildHooksEnv(self):
6056     """Build hooks env.
6057
6058     This runs on master, primary and secondary nodes of the instance.
6059
6060     """
6061     return _BuildInstanceHookEnvByObject(self, self.instance)
6062
6063   def BuildHooksNodes(self):
6064     """Build hooks nodes.
6065
6066     """
6067     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6068     return (nl, nl)
6069
6070   def CheckPrereq(self):
6071     """Check prerequisites.
6072
6073     This checks that the instance is in the cluster and is not running.
6074
6075     """
6076     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6077     assert instance is not None, \
6078       "Cannot retrieve locked instance %s" % self.op.instance_name
6079     _CheckNodeOnline(self, instance.primary_node, "Instance primary node"
6080                      " offline, cannot reinstall")
6081     for node in instance.secondary_nodes:
6082       _CheckNodeOnline(self, node, "Instance secondary node offline,"
6083                        " cannot reinstall")
6084
6085     if instance.disk_template == constants.DT_DISKLESS:
6086       raise errors.OpPrereqError("Instance '%s' has no disks" %
6087                                  self.op.instance_name,
6088                                  errors.ECODE_INVAL)
6089     _CheckInstanceDown(self, instance, "cannot reinstall")
6090
6091     if self.op.os_type is not None:
6092       # OS verification
6093       pnode = _ExpandNodeName(self.cfg, instance.primary_node)
6094       _CheckNodeHasOS(self, pnode, self.op.os_type, self.op.force_variant)
6095       instance_os = self.op.os_type
6096     else:
6097       instance_os = instance.os
6098
6099     nodelist = list(instance.all_nodes)
6100
6101     if self.op.osparams:
6102       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
6103       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
6104       self.os_inst = i_osdict # the new dict (without defaults)
6105     else:
6106       self.os_inst = None
6107
6108     self.instance = instance
6109
6110   def Exec(self, feedback_fn):
6111     """Reinstall the instance.
6112
6113     """
6114     inst = self.instance
6115
6116     if self.op.os_type is not None:
6117       feedback_fn("Changing OS to '%s'..." % self.op.os_type)
6118       inst.os = self.op.os_type
6119       # Write to configuration
6120       self.cfg.Update(inst, feedback_fn)
6121
6122     _StartInstanceDisks(self, inst, None)
6123     try:
6124       feedback_fn("Running the instance OS create scripts...")
6125       # FIXME: pass debug option from opcode to backend
6126       result = self.rpc.call_instance_os_add(inst.primary_node, inst, True,
6127                                              self.op.debug_level,
6128                                              osparams=self.os_inst)
6129       result.Raise("Could not install OS for instance %s on node %s" %
6130                    (inst.name, inst.primary_node))
6131     finally:
6132       _ShutdownInstanceDisks(self, inst)
6133
6134
6135 class LUInstanceRecreateDisks(LogicalUnit):
6136   """Recreate an instance's missing disks.
6137
6138   """
6139   HPATH = "instance-recreate-disks"
6140   HTYPE = constants.HTYPE_INSTANCE
6141   REQ_BGL = False
6142
6143   def CheckArguments(self):
6144     # normalise the disk list
6145     self.op.disks = sorted(frozenset(self.op.disks))
6146
6147   def ExpandNames(self):
6148     self._ExpandAndLockInstance()
6149     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6150     if self.op.nodes:
6151       self.op.nodes = [_ExpandNodeName(self.cfg, n) for n in self.op.nodes]
6152       self.needed_locks[locking.LEVEL_NODE] = list(self.op.nodes)
6153     else:
6154       self.needed_locks[locking.LEVEL_NODE] = []
6155
6156   def DeclareLocks(self, level):
6157     if level == locking.LEVEL_NODE:
6158       # if we replace the nodes, we only need to lock the old primary,
6159       # otherwise we need to lock all nodes for disk re-creation
6160       primary_only = bool(self.op.nodes)
6161       self._LockInstancesNodes(primary_only=primary_only)
6162
6163   def BuildHooksEnv(self):
6164     """Build hooks env.
6165
6166     This runs on master, primary and secondary nodes of the instance.
6167
6168     """
6169     return _BuildInstanceHookEnvByObject(self, self.instance)
6170
6171   def BuildHooksNodes(self):
6172     """Build hooks nodes.
6173
6174     """
6175     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6176     return (nl, nl)
6177
6178   def CheckPrereq(self):
6179     """Check prerequisites.
6180
6181     This checks that the instance is in the cluster and is not running.
6182
6183     """
6184     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6185     assert instance is not None, \
6186       "Cannot retrieve locked instance %s" % self.op.instance_name
6187     if self.op.nodes:
6188       if len(self.op.nodes) != len(instance.all_nodes):
6189         raise errors.OpPrereqError("Instance %s currently has %d nodes, but"
6190                                    " %d replacement nodes were specified" %
6191                                    (instance.name, len(instance.all_nodes),
6192                                     len(self.op.nodes)),
6193                                    errors.ECODE_INVAL)
6194       assert instance.disk_template != constants.DT_DRBD8 or \
6195           len(self.op.nodes) == 2
6196       assert instance.disk_template != constants.DT_PLAIN or \
6197           len(self.op.nodes) == 1
6198       primary_node = self.op.nodes[0]
6199     else:
6200       primary_node = instance.primary_node
6201     _CheckNodeOnline(self, primary_node)
6202
6203     if instance.disk_template == constants.DT_DISKLESS:
6204       raise errors.OpPrereqError("Instance '%s' has no disks" %
6205                                  self.op.instance_name, errors.ECODE_INVAL)
6206     # if we replace nodes *and* the old primary is offline, we don't
6207     # check
6208     assert instance.primary_node in self.needed_locks[locking.LEVEL_NODE]
6209     old_pnode = self.cfg.GetNodeInfo(instance.primary_node)
6210     if not (self.op.nodes and old_pnode.offline):
6211       _CheckInstanceDown(self, instance, "cannot recreate disks")
6212
6213     if not self.op.disks:
6214       self.op.disks = range(len(instance.disks))
6215     else:
6216       for idx in self.op.disks:
6217         if idx >= len(instance.disks):
6218           raise errors.OpPrereqError("Invalid disk index '%s'" % idx,
6219                                      errors.ECODE_INVAL)
6220     if self.op.disks != range(len(instance.disks)) and self.op.nodes:
6221       raise errors.OpPrereqError("Can't recreate disks partially and"
6222                                  " change the nodes at the same time",
6223                                  errors.ECODE_INVAL)
6224     self.instance = instance
6225
6226   def Exec(self, feedback_fn):
6227     """Recreate the disks.
6228
6229     """
6230     instance = self.instance
6231
6232     to_skip = []
6233     mods = [] # keeps track of needed logical_id changes
6234
6235     for idx, disk in enumerate(instance.disks):
6236       if idx not in self.op.disks: # disk idx has not been passed in
6237         to_skip.append(idx)
6238         continue
6239       # update secondaries for disks, if needed
6240       if self.op.nodes:
6241         if disk.dev_type == constants.LD_DRBD8:
6242           # need to update the nodes and minors
6243           assert len(self.op.nodes) == 2
6244           assert len(disk.logical_id) == 6 # otherwise disk internals
6245                                            # have changed
6246           (_, _, old_port, _, _, old_secret) = disk.logical_id
6247           new_minors = self.cfg.AllocateDRBDMinor(self.op.nodes, instance.name)
6248           new_id = (self.op.nodes[0], self.op.nodes[1], old_port,
6249                     new_minors[0], new_minors[1], old_secret)
6250           assert len(disk.logical_id) == len(new_id)
6251           mods.append((idx, new_id))
6252
6253     # now that we have passed all asserts above, we can apply the mods
6254     # in a single run (to avoid partial changes)
6255     for idx, new_id in mods:
6256       instance.disks[idx].logical_id = new_id
6257
6258     # change primary node, if needed
6259     if self.op.nodes:
6260       instance.primary_node = self.op.nodes[0]
6261       self.LogWarning("Changing the instance's nodes, you will have to"
6262                       " remove any disks left on the older nodes manually")
6263
6264     if self.op.nodes:
6265       self.cfg.Update(instance, feedback_fn)
6266
6267     _CreateDisks(self, instance, to_skip=to_skip)
6268
6269
6270 class LUInstanceRename(LogicalUnit):
6271   """Rename an instance.
6272
6273   """
6274   HPATH = "instance-rename"
6275   HTYPE = constants.HTYPE_INSTANCE
6276
6277   def CheckArguments(self):
6278     """Check arguments.
6279
6280     """
6281     if self.op.ip_check and not self.op.name_check:
6282       # TODO: make the ip check more flexible and not depend on the name check
6283       raise errors.OpPrereqError("IP address check requires a name check",
6284                                  errors.ECODE_INVAL)
6285
6286   def BuildHooksEnv(self):
6287     """Build hooks env.
6288
6289     This runs on master, primary and secondary nodes of the instance.
6290
6291     """
6292     env = _BuildInstanceHookEnvByObject(self, self.instance)
6293     env["INSTANCE_NEW_NAME"] = self.op.new_name
6294     return env
6295
6296   def BuildHooksNodes(self):
6297     """Build hooks nodes.
6298
6299     """
6300     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
6301     return (nl, nl)
6302
6303   def CheckPrereq(self):
6304     """Check prerequisites.
6305
6306     This checks that the instance is in the cluster and is not running.
6307
6308     """
6309     self.op.instance_name = _ExpandInstanceName(self.cfg,
6310                                                 self.op.instance_name)
6311     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6312     assert instance is not None
6313     _CheckNodeOnline(self, instance.primary_node)
6314     _CheckInstanceDown(self, instance, "cannot rename")
6315     self.instance = instance
6316
6317     new_name = self.op.new_name
6318     if self.op.name_check:
6319       hostname = netutils.GetHostname(name=new_name)
6320       if hostname != new_name:
6321         self.LogInfo("Resolved given name '%s' to '%s'", new_name,
6322                      hostname.name)
6323       if not utils.MatchNameComponent(self.op.new_name, [hostname.name]):
6324         raise errors.OpPrereqError(("Resolved hostname '%s' does not look the"
6325                                     " same as given hostname '%s'") %
6326                                     (hostname.name, self.op.new_name),
6327                                     errors.ECODE_INVAL)
6328       new_name = self.op.new_name = hostname.name
6329       if (self.op.ip_check and
6330           netutils.TcpPing(hostname.ip, constants.DEFAULT_NODED_PORT)):
6331         raise errors.OpPrereqError("IP %s of instance %s already in use" %
6332                                    (hostname.ip, new_name),
6333                                    errors.ECODE_NOTUNIQUE)
6334
6335     instance_list = self.cfg.GetInstanceList()
6336     if new_name in instance_list and new_name != instance.name:
6337       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
6338                                  new_name, errors.ECODE_EXISTS)
6339
6340   def Exec(self, feedback_fn):
6341     """Rename the instance.
6342
6343     """
6344     inst = self.instance
6345     old_name = inst.name
6346
6347     rename_file_storage = False
6348     if (inst.disk_template in constants.DTS_FILEBASED and
6349         self.op.new_name != inst.name):
6350       old_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6351       rename_file_storage = True
6352
6353     self.cfg.RenameInstance(inst.name, self.op.new_name)
6354     # Change the instance lock. This is definitely safe while we hold the BGL.
6355     # Otherwise the new lock would have to be added in acquired mode.
6356     assert self.REQ_BGL
6357     self.glm.remove(locking.LEVEL_INSTANCE, old_name)
6358     self.glm.add(locking.LEVEL_INSTANCE, self.op.new_name)
6359
6360     # re-read the instance from the configuration after rename
6361     inst = self.cfg.GetInstanceInfo(self.op.new_name)
6362
6363     if rename_file_storage:
6364       new_file_storage_dir = os.path.dirname(inst.disks[0].logical_id[1])
6365       result = self.rpc.call_file_storage_dir_rename(inst.primary_node,
6366                                                      old_file_storage_dir,
6367                                                      new_file_storage_dir)
6368       result.Raise("Could not rename on node %s directory '%s' to '%s'"
6369                    " (but the instance has been renamed in Ganeti)" %
6370                    (inst.primary_node, old_file_storage_dir,
6371                     new_file_storage_dir))
6372
6373     _StartInstanceDisks(self, inst, None)
6374     try:
6375       result = self.rpc.call_instance_run_rename(inst.primary_node, inst,
6376                                                  old_name, self.op.debug_level)
6377       msg = result.fail_msg
6378       if msg:
6379         msg = ("Could not run OS rename script for instance %s on node %s"
6380                " (but the instance has been renamed in Ganeti): %s" %
6381                (inst.name, inst.primary_node, msg))
6382         self.proc.LogWarning(msg)
6383     finally:
6384       _ShutdownInstanceDisks(self, inst)
6385
6386     return inst.name
6387
6388
6389 class LUInstanceRemove(LogicalUnit):
6390   """Remove an instance.
6391
6392   """
6393   HPATH = "instance-remove"
6394   HTYPE = constants.HTYPE_INSTANCE
6395   REQ_BGL = False
6396
6397   def ExpandNames(self):
6398     self._ExpandAndLockInstance()
6399     self.needed_locks[locking.LEVEL_NODE] = []
6400     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6401
6402   def DeclareLocks(self, level):
6403     if level == locking.LEVEL_NODE:
6404       self._LockInstancesNodes()
6405
6406   def BuildHooksEnv(self):
6407     """Build hooks env.
6408
6409     This runs on master, primary and secondary nodes of the instance.
6410
6411     """
6412     env = _BuildInstanceHookEnvByObject(self, self.instance)
6413     env["SHUTDOWN_TIMEOUT"] = self.op.shutdown_timeout
6414     return env
6415
6416   def BuildHooksNodes(self):
6417     """Build hooks nodes.
6418
6419     """
6420     nl = [self.cfg.GetMasterNode()]
6421     nl_post = list(self.instance.all_nodes) + nl
6422     return (nl, nl_post)
6423
6424   def CheckPrereq(self):
6425     """Check prerequisites.
6426
6427     This checks that the instance is in the cluster.
6428
6429     """
6430     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6431     assert self.instance is not None, \
6432       "Cannot retrieve locked instance %s" % self.op.instance_name
6433
6434   def Exec(self, feedback_fn):
6435     """Remove the instance.
6436
6437     """
6438     instance = self.instance
6439     logging.info("Shutting down instance %s on node %s",
6440                  instance.name, instance.primary_node)
6441
6442     result = self.rpc.call_instance_shutdown(instance.primary_node, instance,
6443                                              self.op.shutdown_timeout)
6444     msg = result.fail_msg
6445     if msg:
6446       if self.op.ignore_failures:
6447         feedback_fn("Warning: can't shutdown instance: %s" % msg)
6448       else:
6449         raise errors.OpExecError("Could not shutdown instance %s on"
6450                                  " node %s: %s" %
6451                                  (instance.name, instance.primary_node, msg))
6452
6453     _RemoveInstance(self, feedback_fn, instance, self.op.ignore_failures)
6454
6455
6456 def _RemoveInstance(lu, feedback_fn, instance, ignore_failures):
6457   """Utility function to remove an instance.
6458
6459   """
6460   logging.info("Removing block devices for instance %s", instance.name)
6461
6462   if not _RemoveDisks(lu, instance):
6463     if not ignore_failures:
6464       raise errors.OpExecError("Can't remove instance's disks")
6465     feedback_fn("Warning: can't remove instance's disks")
6466
6467   logging.info("Removing instance %s out of cluster config", instance.name)
6468
6469   lu.cfg.RemoveInstance(instance.name)
6470
6471   assert not lu.remove_locks.get(locking.LEVEL_INSTANCE), \
6472     "Instance lock removal conflict"
6473
6474   # Remove lock for the instance
6475   lu.remove_locks[locking.LEVEL_INSTANCE] = instance.name
6476
6477
6478 class LUInstanceQuery(NoHooksLU):
6479   """Logical unit for querying instances.
6480
6481   """
6482   # pylint: disable-msg=W0142
6483   REQ_BGL = False
6484
6485   def CheckArguments(self):
6486     self.iq = _InstanceQuery(qlang.MakeSimpleFilter("name", self.op.names),
6487                              self.op.output_fields, self.op.use_locking)
6488
6489   def ExpandNames(self):
6490     self.iq.ExpandNames(self)
6491
6492   def DeclareLocks(self, level):
6493     self.iq.DeclareLocks(self, level)
6494
6495   def Exec(self, feedback_fn):
6496     return self.iq.OldStyleQuery(self)
6497
6498
6499 class LUInstanceFailover(LogicalUnit):
6500   """Failover an instance.
6501
6502   """
6503   HPATH = "instance-failover"
6504   HTYPE = constants.HTYPE_INSTANCE
6505   REQ_BGL = False
6506
6507   def CheckArguments(self):
6508     """Check the arguments.
6509
6510     """
6511     self.iallocator = getattr(self.op, "iallocator", None)
6512     self.target_node = getattr(self.op, "target_node", None)
6513
6514   def ExpandNames(self):
6515     self._ExpandAndLockInstance()
6516
6517     if self.op.target_node is not None:
6518       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6519
6520     self.needed_locks[locking.LEVEL_NODE] = []
6521     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6522
6523     ignore_consistency = self.op.ignore_consistency
6524     shutdown_timeout = self.op.shutdown_timeout
6525     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6526                                        cleanup=False,
6527                                        failover=True,
6528                                        ignore_consistency=ignore_consistency,
6529                                        shutdown_timeout=shutdown_timeout)
6530     self.tasklets = [self._migrater]
6531
6532   def DeclareLocks(self, level):
6533     if level == locking.LEVEL_NODE:
6534       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6535       if instance.disk_template in constants.DTS_EXT_MIRROR:
6536         if self.op.target_node is None:
6537           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6538         else:
6539           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6540                                                    self.op.target_node]
6541         del self.recalculate_locks[locking.LEVEL_NODE]
6542       else:
6543         self._LockInstancesNodes()
6544
6545   def BuildHooksEnv(self):
6546     """Build hooks env.
6547
6548     This runs on master, primary and secondary nodes of the instance.
6549
6550     """
6551     instance = self._migrater.instance
6552     source_node = instance.primary_node
6553     target_node = self.op.target_node
6554     env = {
6555       "IGNORE_CONSISTENCY": self.op.ignore_consistency,
6556       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6557       "OLD_PRIMARY": source_node,
6558       "NEW_PRIMARY": target_node,
6559       }
6560
6561     if instance.disk_template in constants.DTS_INT_MIRROR:
6562       env["OLD_SECONDARY"] = instance.secondary_nodes[0]
6563       env["NEW_SECONDARY"] = source_node
6564     else:
6565       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = ""
6566
6567     env.update(_BuildInstanceHookEnvByObject(self, instance))
6568
6569     return env
6570
6571   def BuildHooksNodes(self):
6572     """Build hooks nodes.
6573
6574     """
6575     instance = self._migrater.instance
6576     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6577     return (nl, nl + [instance.primary_node])
6578
6579
6580 class LUInstanceMigrate(LogicalUnit):
6581   """Migrate an instance.
6582
6583   This is migration without shutting down, compared to the failover,
6584   which is done with shutdown.
6585
6586   """
6587   HPATH = "instance-migrate"
6588   HTYPE = constants.HTYPE_INSTANCE
6589   REQ_BGL = False
6590
6591   def ExpandNames(self):
6592     self._ExpandAndLockInstance()
6593
6594     if self.op.target_node is not None:
6595       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6596
6597     self.needed_locks[locking.LEVEL_NODE] = []
6598     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
6599
6600     self._migrater = TLMigrateInstance(self, self.op.instance_name,
6601                                        cleanup=self.op.cleanup,
6602                                        failover=False,
6603                                        fallback=self.op.allow_failover)
6604     self.tasklets = [self._migrater]
6605
6606   def DeclareLocks(self, level):
6607     if level == locking.LEVEL_NODE:
6608       instance = self.context.cfg.GetInstanceInfo(self.op.instance_name)
6609       if instance.disk_template in constants.DTS_EXT_MIRROR:
6610         if self.op.target_node is None:
6611           self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
6612         else:
6613           self.needed_locks[locking.LEVEL_NODE] = [instance.primary_node,
6614                                                    self.op.target_node]
6615         del self.recalculate_locks[locking.LEVEL_NODE]
6616       else:
6617         self._LockInstancesNodes()
6618
6619   def BuildHooksEnv(self):
6620     """Build hooks env.
6621
6622     This runs on master, primary and secondary nodes of the instance.
6623
6624     """
6625     instance = self._migrater.instance
6626     source_node = instance.primary_node
6627     target_node = self.op.target_node
6628     env = _BuildInstanceHookEnvByObject(self, instance)
6629     env.update({
6630       "MIGRATE_LIVE": self._migrater.live,
6631       "MIGRATE_CLEANUP": self.op.cleanup,
6632       "OLD_PRIMARY": source_node,
6633       "NEW_PRIMARY": target_node,
6634       })
6635
6636     if instance.disk_template in constants.DTS_INT_MIRROR:
6637       env["OLD_SECONDARY"] = target_node
6638       env["NEW_SECONDARY"] = source_node
6639     else:
6640       env["OLD_SECONDARY"] = env["NEW_SECONDARY"] = None
6641
6642     return env
6643
6644   def BuildHooksNodes(self):
6645     """Build hooks nodes.
6646
6647     """
6648     instance = self._migrater.instance
6649     nl = [self.cfg.GetMasterNode()] + list(instance.secondary_nodes)
6650     return (nl, nl + [instance.primary_node])
6651
6652
6653 class LUInstanceMove(LogicalUnit):
6654   """Move an instance by data-copying.
6655
6656   """
6657   HPATH = "instance-move"
6658   HTYPE = constants.HTYPE_INSTANCE
6659   REQ_BGL = False
6660
6661   def ExpandNames(self):
6662     self._ExpandAndLockInstance()
6663     target_node = _ExpandNodeName(self.cfg, self.op.target_node)
6664     self.op.target_node = target_node
6665     self.needed_locks[locking.LEVEL_NODE] = [target_node]
6666     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
6667
6668   def DeclareLocks(self, level):
6669     if level == locking.LEVEL_NODE:
6670       self._LockInstancesNodes(primary_only=True)
6671
6672   def BuildHooksEnv(self):
6673     """Build hooks env.
6674
6675     This runs on master, primary and secondary nodes of the instance.
6676
6677     """
6678     env = {
6679       "TARGET_NODE": self.op.target_node,
6680       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
6681       }
6682     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
6683     return env
6684
6685   def BuildHooksNodes(self):
6686     """Build hooks nodes.
6687
6688     """
6689     nl = [
6690       self.cfg.GetMasterNode(),
6691       self.instance.primary_node,
6692       self.op.target_node,
6693       ]
6694     return (nl, nl)
6695
6696   def CheckPrereq(self):
6697     """Check prerequisites.
6698
6699     This checks that the instance is in the cluster.
6700
6701     """
6702     self.instance = instance = self.cfg.GetInstanceInfo(self.op.instance_name)
6703     assert self.instance is not None, \
6704       "Cannot retrieve locked instance %s" % self.op.instance_name
6705
6706     node = self.cfg.GetNodeInfo(self.op.target_node)
6707     assert node is not None, \
6708       "Cannot retrieve locked node %s" % self.op.target_node
6709
6710     self.target_node = target_node = node.name
6711
6712     if target_node == instance.primary_node:
6713       raise errors.OpPrereqError("Instance %s is already on the node %s" %
6714                                  (instance.name, target_node),
6715                                  errors.ECODE_STATE)
6716
6717     bep = self.cfg.GetClusterInfo().FillBE(instance)
6718
6719     for idx, dsk in enumerate(instance.disks):
6720       if dsk.dev_type not in (constants.LD_LV, constants.LD_FILE):
6721         raise errors.OpPrereqError("Instance disk %d has a complex layout,"
6722                                    " cannot copy" % idx, errors.ECODE_STATE)
6723
6724     _CheckNodeOnline(self, target_node)
6725     _CheckNodeNotDrained(self, target_node)
6726     _CheckNodeVmCapable(self, target_node)
6727
6728     if instance.admin_up:
6729       # check memory requirements on the secondary node
6730       _CheckNodeFreeMemory(self, target_node, "failing over instance %s" %
6731                            instance.name, bep[constants.BE_MEMORY],
6732                            instance.hypervisor)
6733     else:
6734       self.LogInfo("Not checking memory on the secondary node as"
6735                    " instance will not be started")
6736
6737     # check bridge existance
6738     _CheckInstanceBridgesExist(self, instance, node=target_node)
6739
6740   def Exec(self, feedback_fn):
6741     """Move an instance.
6742
6743     The move is done by shutting it down on its present node, copying
6744     the data over (slow) and starting it on the new node.
6745
6746     """
6747     instance = self.instance
6748
6749     source_node = instance.primary_node
6750     target_node = self.target_node
6751
6752     self.LogInfo("Shutting down instance %s on source node %s",
6753                  instance.name, source_node)
6754
6755     result = self.rpc.call_instance_shutdown(source_node, instance,
6756                                              self.op.shutdown_timeout)
6757     msg = result.fail_msg
6758     if msg:
6759       if self.op.ignore_consistency:
6760         self.proc.LogWarning("Could not shutdown instance %s on node %s."
6761                              " Proceeding anyway. Please make sure node"
6762                              " %s is down. Error details: %s",
6763                              instance.name, source_node, source_node, msg)
6764       else:
6765         raise errors.OpExecError("Could not shutdown instance %s on"
6766                                  " node %s: %s" %
6767                                  (instance.name, source_node, msg))
6768
6769     # create the target disks
6770     try:
6771       _CreateDisks(self, instance, target_node=target_node)
6772     except errors.OpExecError:
6773       self.LogWarning("Device creation failed, reverting...")
6774       try:
6775         _RemoveDisks(self, instance, target_node=target_node)
6776       finally:
6777         self.cfg.ReleaseDRBDMinors(instance.name)
6778         raise
6779
6780     cluster_name = self.cfg.GetClusterInfo().cluster_name
6781
6782     errs = []
6783     # activate, get path, copy the data over
6784     for idx, disk in enumerate(instance.disks):
6785       self.LogInfo("Copying data for disk %d", idx)
6786       result = self.rpc.call_blockdev_assemble(target_node, disk,
6787                                                instance.name, True, idx)
6788       if result.fail_msg:
6789         self.LogWarning("Can't assemble newly created disk %d: %s",
6790                         idx, result.fail_msg)
6791         errs.append(result.fail_msg)
6792         break
6793       dev_path = result.payload
6794       result = self.rpc.call_blockdev_export(source_node, disk,
6795                                              target_node, dev_path,
6796                                              cluster_name)
6797       if result.fail_msg:
6798         self.LogWarning("Can't copy data over for disk %d: %s",
6799                         idx, result.fail_msg)
6800         errs.append(result.fail_msg)
6801         break
6802
6803     if errs:
6804       self.LogWarning("Some disks failed to copy, aborting")
6805       try:
6806         _RemoveDisks(self, instance, target_node=target_node)
6807       finally:
6808         self.cfg.ReleaseDRBDMinors(instance.name)
6809         raise errors.OpExecError("Errors during disk copy: %s" %
6810                                  (",".join(errs),))
6811
6812     instance.primary_node = target_node
6813     self.cfg.Update(instance, feedback_fn)
6814
6815     self.LogInfo("Removing the disks on the original node")
6816     _RemoveDisks(self, instance, target_node=source_node)
6817
6818     # Only start the instance if it's marked as up
6819     if instance.admin_up:
6820       self.LogInfo("Starting instance %s on node %s",
6821                    instance.name, target_node)
6822
6823       disks_ok, _ = _AssembleInstanceDisks(self, instance,
6824                                            ignore_secondaries=True)
6825       if not disks_ok:
6826         _ShutdownInstanceDisks(self, instance)
6827         raise errors.OpExecError("Can't activate the instance's disks")
6828
6829       result = self.rpc.call_instance_start(target_node, instance,
6830                                             None, None, False)
6831       msg = result.fail_msg
6832       if msg:
6833         _ShutdownInstanceDisks(self, instance)
6834         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
6835                                  (instance.name, target_node, msg))
6836
6837
6838 class LUNodeMigrate(LogicalUnit):
6839   """Migrate all instances from a node.
6840
6841   """
6842   HPATH = "node-migrate"
6843   HTYPE = constants.HTYPE_NODE
6844   REQ_BGL = False
6845
6846   def CheckArguments(self):
6847     pass
6848
6849   def ExpandNames(self):
6850     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
6851
6852     self.share_locks = _ShareAll()
6853     self.needed_locks = {
6854       locking.LEVEL_NODE: [self.op.node_name],
6855       }
6856
6857   def BuildHooksEnv(self):
6858     """Build hooks env.
6859
6860     This runs on the master, the primary and all the secondaries.
6861
6862     """
6863     return {
6864       "NODE_NAME": self.op.node_name,
6865       }
6866
6867   def BuildHooksNodes(self):
6868     """Build hooks nodes.
6869
6870     """
6871     nl = [self.cfg.GetMasterNode()]
6872     return (nl, nl)
6873
6874   def CheckPrereq(self):
6875     pass
6876
6877   def Exec(self, feedback_fn):
6878     # Prepare jobs for migration instances
6879     jobs = [
6880       [opcodes.OpInstanceMigrate(instance_name=inst.name,
6881                                  mode=self.op.mode,
6882                                  live=self.op.live,
6883                                  iallocator=self.op.iallocator,
6884                                  target_node=self.op.target_node)]
6885       for inst in _GetNodePrimaryInstances(self.cfg, self.op.node_name)
6886       ]
6887
6888     # TODO: Run iallocator in this opcode and pass correct placement options to
6889     # OpInstanceMigrate. Since other jobs can modify the cluster between
6890     # running the iallocator and the actual migration, a good consistency model
6891     # will have to be found.
6892
6893     assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
6894             frozenset([self.op.node_name]))
6895
6896     return ResultWithJobs(jobs)
6897
6898
6899 class TLMigrateInstance(Tasklet):
6900   """Tasklet class for instance migration.
6901
6902   @type live: boolean
6903   @ivar live: whether the migration will be done live or non-live;
6904       this variable is initalized only after CheckPrereq has run
6905   @type cleanup: boolean
6906   @ivar cleanup: Wheater we cleanup from a failed migration
6907   @type iallocator: string
6908   @ivar iallocator: The iallocator used to determine target_node
6909   @type target_node: string
6910   @ivar target_node: If given, the target_node to reallocate the instance to
6911   @type failover: boolean
6912   @ivar failover: Whether operation results in failover or migration
6913   @type fallback: boolean
6914   @ivar fallback: Whether fallback to failover is allowed if migration not
6915                   possible
6916   @type ignore_consistency: boolean
6917   @ivar ignore_consistency: Wheter we should ignore consistency between source
6918                             and target node
6919   @type shutdown_timeout: int
6920   @ivar shutdown_timeout: In case of failover timeout of the shutdown
6921
6922   """
6923   def __init__(self, lu, instance_name, cleanup=False,
6924                failover=False, fallback=False,
6925                ignore_consistency=False,
6926                shutdown_timeout=constants.DEFAULT_SHUTDOWN_TIMEOUT):
6927     """Initializes this class.
6928
6929     """
6930     Tasklet.__init__(self, lu)
6931
6932     # Parameters
6933     self.instance_name = instance_name
6934     self.cleanup = cleanup
6935     self.live = False # will be overridden later
6936     self.failover = failover
6937     self.fallback = fallback
6938     self.ignore_consistency = ignore_consistency
6939     self.shutdown_timeout = shutdown_timeout
6940
6941   def CheckPrereq(self):
6942     """Check prerequisites.
6943
6944     This checks that the instance is in the cluster.
6945
6946     """
6947     instance_name = _ExpandInstanceName(self.lu.cfg, self.instance_name)
6948     instance = self.cfg.GetInstanceInfo(instance_name)
6949     assert instance is not None
6950     self.instance = instance
6951
6952     if (not self.cleanup and not instance.admin_up and not self.failover and
6953         self.fallback):
6954       self.lu.LogInfo("Instance is marked down, fallback allowed, switching"
6955                       " to failover")
6956       self.failover = True
6957
6958     if instance.disk_template not in constants.DTS_MIRRORED:
6959       if self.failover:
6960         text = "failovers"
6961       else:
6962         text = "migrations"
6963       raise errors.OpPrereqError("Instance's disk layout '%s' does not allow"
6964                                  " %s" % (instance.disk_template, text),
6965                                  errors.ECODE_STATE)
6966
6967     if instance.disk_template in constants.DTS_EXT_MIRROR:
6968       _CheckIAllocatorOrNode(self.lu, "iallocator", "target_node")
6969
6970       if self.lu.op.iallocator:
6971         self._RunAllocator()
6972       else:
6973         # We set set self.target_node as it is required by
6974         # BuildHooksEnv
6975         self.target_node = self.lu.op.target_node
6976
6977       # self.target_node is already populated, either directly or by the
6978       # iallocator run
6979       target_node = self.target_node
6980       if self.target_node == instance.primary_node:
6981         raise errors.OpPrereqError("Cannot migrate instance %s"
6982                                    " to its primary (%s)" %
6983                                    (instance.name, instance.primary_node))
6984
6985       if len(self.lu.tasklets) == 1:
6986         # It is safe to release locks only when we're the only tasklet
6987         # in the LU
6988         _ReleaseLocks(self.lu, locking.LEVEL_NODE,
6989                       keep=[instance.primary_node, self.target_node])
6990
6991     else:
6992       secondary_nodes = instance.secondary_nodes
6993       if not secondary_nodes:
6994         raise errors.ConfigurationError("No secondary node but using"
6995                                         " %s disk template" %
6996                                         instance.disk_template)
6997       target_node = secondary_nodes[0]
6998       if self.lu.op.iallocator or (self.lu.op.target_node and
6999                                    self.lu.op.target_node != target_node):
7000         if self.failover:
7001           text = "failed over"
7002         else:
7003           text = "migrated"
7004         raise errors.OpPrereqError("Instances with disk template %s cannot"
7005                                    " be %s to arbitrary nodes"
7006                                    " (neither an iallocator nor a target"
7007                                    " node can be passed)" %
7008                                    (instance.disk_template, text),
7009                                    errors.ECODE_INVAL)
7010
7011     i_be = self.cfg.GetClusterInfo().FillBE(instance)
7012
7013     # check memory requirements on the secondary node
7014     if not self.failover or instance.admin_up:
7015       _CheckNodeFreeMemory(self.lu, target_node, "migrating instance %s" %
7016                            instance.name, i_be[constants.BE_MEMORY],
7017                            instance.hypervisor)
7018     else:
7019       self.lu.LogInfo("Not checking memory on the secondary node as"
7020                       " instance will not be started")
7021
7022     # check bridge existance
7023     _CheckInstanceBridgesExist(self.lu, instance, node=target_node)
7024
7025     if not self.cleanup:
7026       _CheckNodeNotDrained(self.lu, target_node)
7027       if not self.failover:
7028         result = self.rpc.call_instance_migratable(instance.primary_node,
7029                                                    instance)
7030         if result.fail_msg and self.fallback:
7031           self.lu.LogInfo("Can't migrate, instance offline, fallback to"
7032                           " failover")
7033           self.failover = True
7034         else:
7035           result.Raise("Can't migrate, please use failover",
7036                        prereq=True, ecode=errors.ECODE_STATE)
7037
7038     assert not (self.failover and self.cleanup)
7039
7040     if not self.failover:
7041       if self.lu.op.live is not None and self.lu.op.mode is not None:
7042         raise errors.OpPrereqError("Only one of the 'live' and 'mode'"
7043                                    " parameters are accepted",
7044                                    errors.ECODE_INVAL)
7045       if self.lu.op.live is not None:
7046         if self.lu.op.live:
7047           self.lu.op.mode = constants.HT_MIGRATION_LIVE
7048         else:
7049           self.lu.op.mode = constants.HT_MIGRATION_NONLIVE
7050         # reset the 'live' parameter to None so that repeated
7051         # invocations of CheckPrereq do not raise an exception
7052         self.lu.op.live = None
7053       elif self.lu.op.mode is None:
7054         # read the default value from the hypervisor
7055         i_hv = self.cfg.GetClusterInfo().FillHV(self.instance,
7056                                                 skip_globals=False)
7057         self.lu.op.mode = i_hv[constants.HV_MIGRATION_MODE]
7058
7059       self.live = self.lu.op.mode == constants.HT_MIGRATION_LIVE
7060     else:
7061       # Failover is never live
7062       self.live = False
7063
7064   def _RunAllocator(self):
7065     """Run the allocator based on input opcode.
7066
7067     """
7068     ial = IAllocator(self.cfg, self.rpc,
7069                      mode=constants.IALLOCATOR_MODE_RELOC,
7070                      name=self.instance_name,
7071                      # TODO See why hail breaks with a single node below
7072                      relocate_from=[self.instance.primary_node,
7073                                     self.instance.primary_node],
7074                      )
7075
7076     ial.Run(self.lu.op.iallocator)
7077
7078     if not ial.success:
7079       raise errors.OpPrereqError("Can't compute nodes using"
7080                                  " iallocator '%s': %s" %
7081                                  (self.lu.op.iallocator, ial.info),
7082                                  errors.ECODE_NORES)
7083     if len(ial.result) != ial.required_nodes:
7084       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
7085                                  " of nodes (%s), required %s" %
7086                                  (self.lu.op.iallocator, len(ial.result),
7087                                   ial.required_nodes), errors.ECODE_FAULT)
7088     self.target_node = ial.result[0]
7089     self.lu.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
7090                  self.instance_name, self.lu.op.iallocator,
7091                  utils.CommaJoin(ial.result))
7092
7093   def _WaitUntilSync(self):
7094     """Poll with custom rpc for disk sync.
7095
7096     This uses our own step-based rpc call.
7097
7098     """
7099     self.feedback_fn("* wait until resync is done")
7100     all_done = False
7101     while not all_done:
7102       all_done = True
7103       result = self.rpc.call_drbd_wait_sync(self.all_nodes,
7104                                             self.nodes_ip,
7105                                             self.instance.disks)
7106       min_percent = 100
7107       for node, nres in result.items():
7108         nres.Raise("Cannot resync disks on node %s" % node)
7109         node_done, node_percent = nres.payload
7110         all_done = all_done and node_done
7111         if node_percent is not None:
7112           min_percent = min(min_percent, node_percent)
7113       if not all_done:
7114         if min_percent < 100:
7115           self.feedback_fn("   - progress: %.1f%%" % min_percent)
7116         time.sleep(2)
7117
7118   def _EnsureSecondary(self, node):
7119     """Demote a node to secondary.
7120
7121     """
7122     self.feedback_fn("* switching node %s to secondary mode" % node)
7123
7124     for dev in self.instance.disks:
7125       self.cfg.SetDiskID(dev, node)
7126
7127     result = self.rpc.call_blockdev_close(node, self.instance.name,
7128                                           self.instance.disks)
7129     result.Raise("Cannot change disk to secondary on node %s" % node)
7130
7131   def _GoStandalone(self):
7132     """Disconnect from the network.
7133
7134     """
7135     self.feedback_fn("* changing into standalone mode")
7136     result = self.rpc.call_drbd_disconnect_net(self.all_nodes, self.nodes_ip,
7137                                                self.instance.disks)
7138     for node, nres in result.items():
7139       nres.Raise("Cannot disconnect disks node %s" % node)
7140
7141   def _GoReconnect(self, multimaster):
7142     """Reconnect to the network.
7143
7144     """
7145     if multimaster:
7146       msg = "dual-master"
7147     else:
7148       msg = "single-master"
7149     self.feedback_fn("* changing disks into %s mode" % msg)
7150     result = self.rpc.call_drbd_attach_net(self.all_nodes, self.nodes_ip,
7151                                            self.instance.disks,
7152                                            self.instance.name, multimaster)
7153     for node, nres in result.items():
7154       nres.Raise("Cannot change disks config on node %s" % node)
7155
7156   def _ExecCleanup(self):
7157     """Try to cleanup after a failed migration.
7158
7159     The cleanup is done by:
7160       - check that the instance is running only on one node
7161         (and update the config if needed)
7162       - change disks on its secondary node to secondary
7163       - wait until disks are fully synchronized
7164       - disconnect from the network
7165       - change disks into single-master mode
7166       - wait again until disks are fully synchronized
7167
7168     """
7169     instance = self.instance
7170     target_node = self.target_node
7171     source_node = self.source_node
7172
7173     # check running on only one node
7174     self.feedback_fn("* checking where the instance actually runs"
7175                      " (if this hangs, the hypervisor might be in"
7176                      " a bad state)")
7177     ins_l = self.rpc.call_instance_list(self.all_nodes, [instance.hypervisor])
7178     for node, result in ins_l.items():
7179       result.Raise("Can't contact node %s" % node)
7180
7181     runningon_source = instance.name in ins_l[source_node].payload
7182     runningon_target = instance.name in ins_l[target_node].payload
7183
7184     if runningon_source and runningon_target:
7185       raise errors.OpExecError("Instance seems to be running on two nodes,"
7186                                " or the hypervisor is confused; you will have"
7187                                " to ensure manually that it runs only on one"
7188                                " and restart this operation")
7189
7190     if not (runningon_source or runningon_target):
7191       raise errors.OpExecError("Instance does not seem to be running at all;"
7192                                " in this case it's safer to repair by"
7193                                " running 'gnt-instance stop' to ensure disk"
7194                                " shutdown, and then restarting it")
7195
7196     if runningon_target:
7197       # the migration has actually succeeded, we need to update the config
7198       self.feedback_fn("* instance running on secondary node (%s),"
7199                        " updating config" % target_node)
7200       instance.primary_node = target_node
7201       self.cfg.Update(instance, self.feedback_fn)
7202       demoted_node = source_node
7203     else:
7204       self.feedback_fn("* instance confirmed to be running on its"
7205                        " primary node (%s)" % source_node)
7206       demoted_node = target_node
7207
7208     if instance.disk_template in constants.DTS_INT_MIRROR:
7209       self._EnsureSecondary(demoted_node)
7210       try:
7211         self._WaitUntilSync()
7212       except errors.OpExecError:
7213         # we ignore here errors, since if the device is standalone, it
7214         # won't be able to sync
7215         pass
7216       self._GoStandalone()
7217       self._GoReconnect(False)
7218       self._WaitUntilSync()
7219
7220     self.feedback_fn("* done")
7221
7222   def _RevertDiskStatus(self):
7223     """Try to revert the disk status after a failed migration.
7224
7225     """
7226     target_node = self.target_node
7227     if self.instance.disk_template in constants.DTS_EXT_MIRROR:
7228       return
7229
7230     try:
7231       self._EnsureSecondary(target_node)
7232       self._GoStandalone()
7233       self._GoReconnect(False)
7234       self._WaitUntilSync()
7235     except errors.OpExecError, err:
7236       self.lu.LogWarning("Migration failed and I can't reconnect the drives,"
7237                          " please try to recover the instance manually;"
7238                          " error '%s'" % str(err))
7239
7240   def _AbortMigration(self):
7241     """Call the hypervisor code to abort a started migration.
7242
7243     """
7244     instance = self.instance
7245     target_node = self.target_node
7246     migration_info = self.migration_info
7247
7248     abort_result = self.rpc.call_finalize_migration(target_node,
7249                                                     instance,
7250                                                     migration_info,
7251                                                     False)
7252     abort_msg = abort_result.fail_msg
7253     if abort_msg:
7254       logging.error("Aborting migration failed on target node %s: %s",
7255                     target_node, abort_msg)
7256       # Don't raise an exception here, as we stil have to try to revert the
7257       # disk status, even if this step failed.
7258
7259   def _ExecMigration(self):
7260     """Migrate an instance.
7261
7262     The migrate is done by:
7263       - change the disks into dual-master mode
7264       - wait until disks are fully synchronized again
7265       - migrate the instance
7266       - change disks on the new secondary node (the old primary) to secondary
7267       - wait until disks are fully synchronized
7268       - change disks into single-master mode
7269
7270     """
7271     instance = self.instance
7272     target_node = self.target_node
7273     source_node = self.source_node
7274
7275     self.feedback_fn("* checking disk consistency between source and target")
7276     for dev in instance.disks:
7277       if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7278         raise errors.OpExecError("Disk %s is degraded or not fully"
7279                                  " synchronized on target node,"
7280                                  " aborting migration" % dev.iv_name)
7281
7282     # First get the migration information from the remote node
7283     result = self.rpc.call_migration_info(source_node, instance)
7284     msg = result.fail_msg
7285     if msg:
7286       log_err = ("Failed fetching source migration information from %s: %s" %
7287                  (source_node, msg))
7288       logging.error(log_err)
7289       raise errors.OpExecError(log_err)
7290
7291     self.migration_info = migration_info = result.payload
7292
7293     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7294       # Then switch the disks to master/master mode
7295       self._EnsureSecondary(target_node)
7296       self._GoStandalone()
7297       self._GoReconnect(True)
7298       self._WaitUntilSync()
7299
7300     self.feedback_fn("* preparing %s to accept the instance" % target_node)
7301     result = self.rpc.call_accept_instance(target_node,
7302                                            instance,
7303                                            migration_info,
7304                                            self.nodes_ip[target_node])
7305
7306     msg = result.fail_msg
7307     if msg:
7308       logging.error("Instance pre-migration failed, trying to revert"
7309                     " disk status: %s", msg)
7310       self.feedback_fn("Pre-migration failed, aborting")
7311       self._AbortMigration()
7312       self._RevertDiskStatus()
7313       raise errors.OpExecError("Could not pre-migrate instance %s: %s" %
7314                                (instance.name, msg))
7315
7316     self.feedback_fn("* migrating instance to %s" % target_node)
7317     result = self.rpc.call_instance_migrate(source_node, instance,
7318                                             self.nodes_ip[target_node],
7319                                             self.live)
7320     msg = result.fail_msg
7321     if msg:
7322       logging.error("Instance migration failed, trying to revert"
7323                     " disk status: %s", msg)
7324       self.feedback_fn("Migration failed, aborting")
7325       self._AbortMigration()
7326       self._RevertDiskStatus()
7327       raise errors.OpExecError("Could not migrate instance %s: %s" %
7328                                (instance.name, msg))
7329
7330     instance.primary_node = target_node
7331     # distribute new instance config to the other nodes
7332     self.cfg.Update(instance, self.feedback_fn)
7333
7334     result = self.rpc.call_finalize_migration(target_node,
7335                                               instance,
7336                                               migration_info,
7337                                               True)
7338     msg = result.fail_msg
7339     if msg:
7340       logging.error("Instance migration succeeded, but finalization failed:"
7341                     " %s", msg)
7342       raise errors.OpExecError("Could not finalize instance migration: %s" %
7343                                msg)
7344
7345     if self.instance.disk_template not in constants.DTS_EXT_MIRROR:
7346       self._EnsureSecondary(source_node)
7347       self._WaitUntilSync()
7348       self._GoStandalone()
7349       self._GoReconnect(False)
7350       self._WaitUntilSync()
7351
7352     self.feedback_fn("* done")
7353
7354   def _ExecFailover(self):
7355     """Failover an instance.
7356
7357     The failover is done by shutting it down on its present node and
7358     starting it on the secondary.
7359
7360     """
7361     instance = self.instance
7362     primary_node = self.cfg.GetNodeInfo(instance.primary_node)
7363
7364     source_node = instance.primary_node
7365     target_node = self.target_node
7366
7367     if instance.admin_up:
7368       self.feedback_fn("* checking disk consistency between source and target")
7369       for dev in instance.disks:
7370         # for drbd, these are drbd over lvm
7371         if not _CheckDiskConsistency(self.lu, dev, target_node, False):
7372           if primary_node.offline:
7373             self.feedback_fn("Node %s is offline, ignoring degraded disk %s on"
7374                              " target node %s" %
7375                              (primary_node.name, dev.iv_name, target_node))
7376           elif not self.ignore_consistency:
7377             raise errors.OpExecError("Disk %s is degraded on target node,"
7378                                      " aborting failover" % dev.iv_name)
7379     else:
7380       self.feedback_fn("* not checking disk consistency as instance is not"
7381                        " running")
7382
7383     self.feedback_fn("* shutting down instance on source node")
7384     logging.info("Shutting down instance %s on node %s",
7385                  instance.name, source_node)
7386
7387     result = self.rpc.call_instance_shutdown(source_node, instance,
7388                                              self.shutdown_timeout)
7389     msg = result.fail_msg
7390     if msg:
7391       if self.ignore_consistency or primary_node.offline:
7392         self.lu.LogWarning("Could not shutdown instance %s on node %s,"
7393                            " proceeding anyway; please make sure node"
7394                            " %s is down; error details: %s",
7395                            instance.name, source_node, source_node, msg)
7396       else:
7397         raise errors.OpExecError("Could not shutdown instance %s on"
7398                                  " node %s: %s" %
7399                                  (instance.name, source_node, msg))
7400
7401     self.feedback_fn("* deactivating the instance's disks on source node")
7402     if not _ShutdownInstanceDisks(self.lu, instance, ignore_primary=True):
7403       raise errors.OpExecError("Can't shut down the instance's disks")
7404
7405     instance.primary_node = target_node
7406     # distribute new instance config to the other nodes
7407     self.cfg.Update(instance, self.feedback_fn)
7408
7409     # Only start the instance if it's marked as up
7410     if instance.admin_up:
7411       self.feedback_fn("* activating the instance's disks on target node %s" %
7412                        target_node)
7413       logging.info("Starting instance %s on node %s",
7414                    instance.name, target_node)
7415
7416       disks_ok, _ = _AssembleInstanceDisks(self.lu, instance,
7417                                            ignore_secondaries=True)
7418       if not disks_ok:
7419         _ShutdownInstanceDisks(self.lu, instance)
7420         raise errors.OpExecError("Can't activate the instance's disks")
7421
7422       self.feedback_fn("* starting the instance on the target node %s" %
7423                        target_node)
7424       result = self.rpc.call_instance_start(target_node, instance, None, None,
7425                                             False)
7426       msg = result.fail_msg
7427       if msg:
7428         _ShutdownInstanceDisks(self.lu, instance)
7429         raise errors.OpExecError("Could not start instance %s on node %s: %s" %
7430                                  (instance.name, target_node, msg))
7431
7432   def Exec(self, feedback_fn):
7433     """Perform the migration.
7434
7435     """
7436     self.feedback_fn = feedback_fn
7437     self.source_node = self.instance.primary_node
7438
7439     # FIXME: if we implement migrate-to-any in DRBD, this needs fixing
7440     if self.instance.disk_template in constants.DTS_INT_MIRROR:
7441       self.target_node = self.instance.secondary_nodes[0]
7442       # Otherwise self.target_node has been populated either
7443       # directly, or through an iallocator.
7444
7445     self.all_nodes = [self.source_node, self.target_node]
7446     self.nodes_ip = dict((name, node.secondary_ip) for (name, node)
7447                          in self.cfg.GetMultiNodeInfo(self.all_nodes))
7448
7449     if self.failover:
7450       feedback_fn("Failover instance %s" % self.instance.name)
7451       self._ExecFailover()
7452     else:
7453       feedback_fn("Migrating instance %s" % self.instance.name)
7454
7455       if self.cleanup:
7456         return self._ExecCleanup()
7457       else:
7458         return self._ExecMigration()
7459
7460
7461 def _CreateBlockDev(lu, node, instance, device, force_create,
7462                     info, force_open):
7463   """Create a tree of block devices on a given node.
7464
7465   If this device type has to be created on secondaries, create it and
7466   all its children.
7467
7468   If not, just recurse to children keeping the same 'force' value.
7469
7470   @param lu: the lu on whose behalf we execute
7471   @param node: the node on which to create the device
7472   @type instance: L{objects.Instance}
7473   @param instance: the instance which owns the device
7474   @type device: L{objects.Disk}
7475   @param device: the device to create
7476   @type force_create: boolean
7477   @param force_create: whether to force creation of this device; this
7478       will be change to True whenever we find a device which has
7479       CreateOnSecondary() attribute
7480   @param info: the extra 'metadata' we should attach to the device
7481       (this will be represented as a LVM tag)
7482   @type force_open: boolean
7483   @param force_open: this parameter will be passes to the
7484       L{backend.BlockdevCreate} function where it specifies
7485       whether we run on primary or not, and it affects both
7486       the child assembly and the device own Open() execution
7487
7488   """
7489   if device.CreateOnSecondary():
7490     force_create = True
7491
7492   if device.children:
7493     for child in device.children:
7494       _CreateBlockDev(lu, node, instance, child, force_create,
7495                       info, force_open)
7496
7497   if not force_create:
7498     return
7499
7500   _CreateSingleBlockDev(lu, node, instance, device, info, force_open)
7501
7502
7503 def _CreateSingleBlockDev(lu, node, instance, device, info, force_open):
7504   """Create a single block device on a given node.
7505
7506   This will not recurse over children of the device, so they must be
7507   created in advance.
7508
7509   @param lu: the lu on whose behalf we execute
7510   @param node: the node on which to create the device
7511   @type instance: L{objects.Instance}
7512   @param instance: the instance which owns the device
7513   @type device: L{objects.Disk}
7514   @param device: the device to create
7515   @param info: the extra 'metadata' we should attach to the device
7516       (this will be represented as a LVM tag)
7517   @type force_open: boolean
7518   @param force_open: this parameter will be passes to the
7519       L{backend.BlockdevCreate} function where it specifies
7520       whether we run on primary or not, and it affects both
7521       the child assembly and the device own Open() execution
7522
7523   """
7524   lu.cfg.SetDiskID(device, node)
7525   result = lu.rpc.call_blockdev_create(node, device, device.size,
7526                                        instance.name, force_open, info)
7527   result.Raise("Can't create block device %s on"
7528                " node %s for instance %s" % (device, node, instance.name))
7529   if device.physical_id is None:
7530     device.physical_id = result.payload
7531
7532
7533 def _GenerateUniqueNames(lu, exts):
7534   """Generate a suitable LV name.
7535
7536   This will generate a logical volume name for the given instance.
7537
7538   """
7539   results = []
7540   for val in exts:
7541     new_id = lu.cfg.GenerateUniqueID(lu.proc.GetECId())
7542     results.append("%s%s" % (new_id, val))
7543   return results
7544
7545
7546 def _GenerateDRBD8Branch(lu, primary, secondary, size, vgnames, names,
7547                          iv_name, p_minor, s_minor):
7548   """Generate a drbd8 device complete with its children.
7549
7550   """
7551   assert len(vgnames) == len(names) == 2
7552   port = lu.cfg.AllocatePort()
7553   shared_secret = lu.cfg.GenerateDRBDSecret(lu.proc.GetECId())
7554   dev_data = objects.Disk(dev_type=constants.LD_LV, size=size,
7555                           logical_id=(vgnames[0], names[0]))
7556   dev_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
7557                           logical_id=(vgnames[1], names[1]))
7558   drbd_dev = objects.Disk(dev_type=constants.LD_DRBD8, size=size,
7559                           logical_id=(primary, secondary, port,
7560                                       p_minor, s_minor,
7561                                       shared_secret),
7562                           children=[dev_data, dev_meta],
7563                           iv_name=iv_name)
7564   return drbd_dev
7565
7566
7567 def _GenerateDiskTemplate(lu, template_name,
7568                           instance_name, primary_node,
7569                           secondary_nodes, disk_info,
7570                           file_storage_dir, file_driver,
7571                           base_index, feedback_fn):
7572   """Generate the entire disk layout for a given template type.
7573
7574   """
7575   #TODO: compute space requirements
7576
7577   vgname = lu.cfg.GetVGName()
7578   disk_count = len(disk_info)
7579   disks = []
7580   if template_name == constants.DT_DISKLESS:
7581     pass
7582   elif template_name == constants.DT_PLAIN:
7583     if len(secondary_nodes) != 0:
7584       raise errors.ProgrammerError("Wrong template configuration")
7585
7586     names = _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7587                                       for i in range(disk_count)])
7588     for idx, disk in enumerate(disk_info):
7589       disk_index = idx + base_index
7590       vg = disk.get(constants.IDISK_VG, vgname)
7591       feedback_fn("* disk %i, vg %s, name %s" % (idx, vg, names[idx]))
7592       disk_dev = objects.Disk(dev_type=constants.LD_LV,
7593                               size=disk[constants.IDISK_SIZE],
7594                               logical_id=(vg, names[idx]),
7595                               iv_name="disk/%d" % disk_index,
7596                               mode=disk[constants.IDISK_MODE])
7597       disks.append(disk_dev)
7598   elif template_name == constants.DT_DRBD8:
7599     if len(secondary_nodes) != 1:
7600       raise errors.ProgrammerError("Wrong template configuration")
7601     remote_node = secondary_nodes[0]
7602     minors = lu.cfg.AllocateDRBDMinor(
7603       [primary_node, remote_node] * len(disk_info), instance_name)
7604
7605     names = []
7606     for lv_prefix in _GenerateUniqueNames(lu, [".disk%d" % (base_index + i)
7607                                                for i in range(disk_count)]):
7608       names.append(lv_prefix + "_data")
7609       names.append(lv_prefix + "_meta")
7610     for idx, disk in enumerate(disk_info):
7611       disk_index = idx + base_index
7612       data_vg = disk.get(constants.IDISK_VG, vgname)
7613       meta_vg = disk.get(constants.IDISK_METAVG, data_vg)
7614       disk_dev = _GenerateDRBD8Branch(lu, primary_node, remote_node,
7615                                       disk[constants.IDISK_SIZE],
7616                                       [data_vg, meta_vg],
7617                                       names[idx * 2:idx * 2 + 2],
7618                                       "disk/%d" % disk_index,
7619                                       minors[idx * 2], minors[idx * 2 + 1])
7620       disk_dev.mode = disk[constants.IDISK_MODE]
7621       disks.append(disk_dev)
7622   elif template_name == constants.DT_FILE:
7623     if len(secondary_nodes) != 0:
7624       raise errors.ProgrammerError("Wrong template configuration")
7625
7626     opcodes.RequireFileStorage()
7627
7628     for idx, disk in enumerate(disk_info):
7629       disk_index = idx + base_index
7630       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7631                               size=disk[constants.IDISK_SIZE],
7632                               iv_name="disk/%d" % disk_index,
7633                               logical_id=(file_driver,
7634                                           "%s/disk%d" % (file_storage_dir,
7635                                                          disk_index)),
7636                               mode=disk[constants.IDISK_MODE])
7637       disks.append(disk_dev)
7638   elif template_name == constants.DT_SHARED_FILE:
7639     if len(secondary_nodes) != 0:
7640       raise errors.ProgrammerError("Wrong template configuration")
7641
7642     opcodes.RequireSharedFileStorage()
7643
7644     for idx, disk in enumerate(disk_info):
7645       disk_index = idx + base_index
7646       disk_dev = objects.Disk(dev_type=constants.LD_FILE,
7647                               size=disk[constants.IDISK_SIZE],
7648                               iv_name="disk/%d" % disk_index,
7649                               logical_id=(file_driver,
7650                                           "%s/disk%d" % (file_storage_dir,
7651                                                          disk_index)),
7652                               mode=disk[constants.IDISK_MODE])
7653       disks.append(disk_dev)
7654   elif template_name == constants.DT_BLOCK:
7655     if len(secondary_nodes) != 0:
7656       raise errors.ProgrammerError("Wrong template configuration")
7657
7658     for idx, disk in enumerate(disk_info):
7659       disk_index = idx + base_index
7660       disk_dev = objects.Disk(dev_type=constants.LD_BLOCKDEV,
7661                               size=disk[constants.IDISK_SIZE],
7662                               logical_id=(constants.BLOCKDEV_DRIVER_MANUAL,
7663                                           disk[constants.IDISK_ADOPT]),
7664                               iv_name="disk/%d" % disk_index,
7665                               mode=disk[constants.IDISK_MODE])
7666       disks.append(disk_dev)
7667
7668   else:
7669     raise errors.ProgrammerError("Invalid disk template '%s'" % template_name)
7670   return disks
7671
7672
7673 def _GetInstanceInfoText(instance):
7674   """Compute that text that should be added to the disk's metadata.
7675
7676   """
7677   return "originstname+%s" % instance.name
7678
7679
7680 def _CalcEta(time_taken, written, total_size):
7681   """Calculates the ETA based on size written and total size.
7682
7683   @param time_taken: The time taken so far
7684   @param written: amount written so far
7685   @param total_size: The total size of data to be written
7686   @return: The remaining time in seconds
7687
7688   """
7689   avg_time = time_taken / float(written)
7690   return (total_size - written) * avg_time
7691
7692
7693 def _WipeDisks(lu, instance):
7694   """Wipes instance disks.
7695
7696   @type lu: L{LogicalUnit}
7697   @param lu: the logical unit on whose behalf we execute
7698   @type instance: L{objects.Instance}
7699   @param instance: the instance whose disks we should create
7700   @return: the success of the wipe
7701
7702   """
7703   node = instance.primary_node
7704
7705   for device in instance.disks:
7706     lu.cfg.SetDiskID(device, node)
7707
7708   logging.info("Pause sync of instance %s disks", instance.name)
7709   result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, True)
7710
7711   for idx, success in enumerate(result.payload):
7712     if not success:
7713       logging.warn("pause-sync of instance %s for disks %d failed",
7714                    instance.name, idx)
7715
7716   try:
7717     for idx, device in enumerate(instance.disks):
7718       # The wipe size is MIN_WIPE_CHUNK_PERCENT % of the instance disk but
7719       # MAX_WIPE_CHUNK at max
7720       wipe_chunk_size = min(constants.MAX_WIPE_CHUNK, device.size / 100.0 *
7721                             constants.MIN_WIPE_CHUNK_PERCENT)
7722       # we _must_ make this an int, otherwise rounding errors will
7723       # occur
7724       wipe_chunk_size = int(wipe_chunk_size)
7725
7726       lu.LogInfo("* Wiping disk %d", idx)
7727       logging.info("Wiping disk %d for instance %s, node %s using"
7728                    " chunk size %s", idx, instance.name, node, wipe_chunk_size)
7729
7730       offset = 0
7731       size = device.size
7732       last_output = 0
7733       start_time = time.time()
7734
7735       while offset < size:
7736         wipe_size = min(wipe_chunk_size, size - offset)
7737         logging.debug("Wiping disk %d, offset %s, chunk %s",
7738                       idx, offset, wipe_size)
7739         result = lu.rpc.call_blockdev_wipe(node, device, offset, wipe_size)
7740         result.Raise("Could not wipe disk %d at offset %d for size %d" %
7741                      (idx, offset, wipe_size))
7742         now = time.time()
7743         offset += wipe_size
7744         if now - last_output >= 60:
7745           eta = _CalcEta(now - start_time, offset, size)
7746           lu.LogInfo(" - done: %.1f%% ETA: %s" %
7747                      (offset / float(size) * 100, utils.FormatSeconds(eta)))
7748           last_output = now
7749   finally:
7750     logging.info("Resume sync of instance %s disks", instance.name)
7751
7752     result = lu.rpc.call_blockdev_pause_resume_sync(node, instance.disks, False)
7753
7754     for idx, success in enumerate(result.payload):
7755       if not success:
7756         lu.LogWarning("Resume sync of disk %d failed, please have a"
7757                       " look at the status and troubleshoot the issue", idx)
7758         logging.warn("resume-sync of instance %s for disks %d failed",
7759                      instance.name, idx)
7760
7761
7762 def _CreateDisks(lu, instance, to_skip=None, target_node=None):
7763   """Create all disks for an instance.
7764
7765   This abstracts away some work from AddInstance.
7766
7767   @type lu: L{LogicalUnit}
7768   @param lu: the logical unit on whose behalf we execute
7769   @type instance: L{objects.Instance}
7770   @param instance: the instance whose disks we should create
7771   @type to_skip: list
7772   @param to_skip: list of indices to skip
7773   @type target_node: string
7774   @param target_node: if passed, overrides the target node for creation
7775   @rtype: boolean
7776   @return: the success of the creation
7777
7778   """
7779   info = _GetInstanceInfoText(instance)
7780   if target_node is None:
7781     pnode = instance.primary_node
7782     all_nodes = instance.all_nodes
7783   else:
7784     pnode = target_node
7785     all_nodes = [pnode]
7786
7787   if instance.disk_template in constants.DTS_FILEBASED:
7788     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7789     result = lu.rpc.call_file_storage_dir_create(pnode, file_storage_dir)
7790
7791     result.Raise("Failed to create directory '%s' on"
7792                  " node %s" % (file_storage_dir, pnode))
7793
7794   # Note: this needs to be kept in sync with adding of disks in
7795   # LUInstanceSetParams
7796   for idx, device in enumerate(instance.disks):
7797     if to_skip and idx in to_skip:
7798       continue
7799     logging.info("Creating volume %s for instance %s",
7800                  device.iv_name, instance.name)
7801     #HARDCODE
7802     for node in all_nodes:
7803       f_create = node == pnode
7804       _CreateBlockDev(lu, node, instance, device, f_create, info, f_create)
7805
7806
7807 def _RemoveDisks(lu, instance, target_node=None):
7808   """Remove all disks for an instance.
7809
7810   This abstracts away some work from `AddInstance()` and
7811   `RemoveInstance()`. Note that in case some of the devices couldn't
7812   be removed, the removal will continue with the other ones (compare
7813   with `_CreateDisks()`).
7814
7815   @type lu: L{LogicalUnit}
7816   @param lu: the logical unit on whose behalf we execute
7817   @type instance: L{objects.Instance}
7818   @param instance: the instance whose disks we should remove
7819   @type target_node: string
7820   @param target_node: used to override the node on which to remove the disks
7821   @rtype: boolean
7822   @return: the success of the removal
7823
7824   """
7825   logging.info("Removing block devices for instance %s", instance.name)
7826
7827   all_result = True
7828   for device in instance.disks:
7829     if target_node:
7830       edata = [(target_node, device)]
7831     else:
7832       edata = device.ComputeNodeTree(instance.primary_node)
7833     for node, disk in edata:
7834       lu.cfg.SetDiskID(disk, node)
7835       msg = lu.rpc.call_blockdev_remove(node, disk).fail_msg
7836       if msg:
7837         lu.LogWarning("Could not remove block device %s on node %s,"
7838                       " continuing anyway: %s", device.iv_name, node, msg)
7839         all_result = False
7840
7841   if instance.disk_template == constants.DT_FILE:
7842     file_storage_dir = os.path.dirname(instance.disks[0].logical_id[1])
7843     if target_node:
7844       tgt = target_node
7845     else:
7846       tgt = instance.primary_node
7847     result = lu.rpc.call_file_storage_dir_remove(tgt, file_storage_dir)
7848     if result.fail_msg:
7849       lu.LogWarning("Could not remove directory '%s' on node %s: %s",
7850                     file_storage_dir, instance.primary_node, result.fail_msg)
7851       all_result = False
7852
7853   return all_result
7854
7855
7856 def _ComputeDiskSizePerVG(disk_template, disks):
7857   """Compute disk size requirements in the volume group
7858
7859   """
7860   def _compute(disks, payload):
7861     """Universal algorithm.
7862
7863     """
7864     vgs = {}
7865     for disk in disks:
7866       vgs[disk[constants.IDISK_VG]] = \
7867         vgs.get(constants.IDISK_VG, 0) + disk[constants.IDISK_SIZE] + payload
7868
7869     return vgs
7870
7871   # Required free disk space as a function of disk and swap space
7872   req_size_dict = {
7873     constants.DT_DISKLESS: {},
7874     constants.DT_PLAIN: _compute(disks, 0),
7875     # 128 MB are added for drbd metadata for each disk
7876     constants.DT_DRBD8: _compute(disks, 128),
7877     constants.DT_FILE: {},
7878     constants.DT_SHARED_FILE: {},
7879   }
7880
7881   if disk_template not in req_size_dict:
7882     raise errors.ProgrammerError("Disk template '%s' size requirement"
7883                                  " is unknown" %  disk_template)
7884
7885   return req_size_dict[disk_template]
7886
7887
7888 def _ComputeDiskSize(disk_template, disks):
7889   """Compute disk size requirements in the volume group
7890
7891   """
7892   # Required free disk space as a function of disk and swap space
7893   req_size_dict = {
7894     constants.DT_DISKLESS: None,
7895     constants.DT_PLAIN: sum(d[constants.IDISK_SIZE] for d in disks),
7896     # 128 MB are added for drbd metadata for each disk
7897     constants.DT_DRBD8: sum(d[constants.IDISK_SIZE] + 128 for d in disks),
7898     constants.DT_FILE: None,
7899     constants.DT_SHARED_FILE: 0,
7900     constants.DT_BLOCK: 0,
7901   }
7902
7903   if disk_template not in req_size_dict:
7904     raise errors.ProgrammerError("Disk template '%s' size requirement"
7905                                  " is unknown" %  disk_template)
7906
7907   return req_size_dict[disk_template]
7908
7909
7910 def _FilterVmNodes(lu, nodenames):
7911   """Filters out non-vm_capable nodes from a list.
7912
7913   @type lu: L{LogicalUnit}
7914   @param lu: the logical unit for which we check
7915   @type nodenames: list
7916   @param nodenames: the list of nodes on which we should check
7917   @rtype: list
7918   @return: the list of vm-capable nodes
7919
7920   """
7921   vm_nodes = frozenset(lu.cfg.GetNonVmCapableNodeList())
7922   return [name for name in nodenames if name not in vm_nodes]
7923
7924
7925 def _CheckHVParams(lu, nodenames, hvname, hvparams):
7926   """Hypervisor parameter validation.
7927
7928   This function abstract the hypervisor parameter validation to be
7929   used in both instance create and instance modify.
7930
7931   @type lu: L{LogicalUnit}
7932   @param lu: the logical unit for which we check
7933   @type nodenames: list
7934   @param nodenames: the list of nodes on which we should check
7935   @type hvname: string
7936   @param hvname: the name of the hypervisor we should use
7937   @type hvparams: dict
7938   @param hvparams: the parameters which we need to check
7939   @raise errors.OpPrereqError: if the parameters are not valid
7940
7941   """
7942   nodenames = _FilterVmNodes(lu, nodenames)
7943   hvinfo = lu.rpc.call_hypervisor_validate_params(nodenames,
7944                                                   hvname,
7945                                                   hvparams)
7946   for node in nodenames:
7947     info = hvinfo[node]
7948     if info.offline:
7949       continue
7950     info.Raise("Hypervisor parameter validation failed on node %s" % node)
7951
7952
7953 def _CheckOSParams(lu, required, nodenames, osname, osparams):
7954   """OS parameters validation.
7955
7956   @type lu: L{LogicalUnit}
7957   @param lu: the logical unit for which we check
7958   @type required: boolean
7959   @param required: whether the validation should fail if the OS is not
7960       found
7961   @type nodenames: list
7962   @param nodenames: the list of nodes on which we should check
7963   @type osname: string
7964   @param osname: the name of the hypervisor we should use
7965   @type osparams: dict
7966   @param osparams: the parameters which we need to check
7967   @raise errors.OpPrereqError: if the parameters are not valid
7968
7969   """
7970   nodenames = _FilterVmNodes(lu, nodenames)
7971   result = lu.rpc.call_os_validate(required, nodenames, osname,
7972                                    [constants.OS_VALIDATE_PARAMETERS],
7973                                    osparams)
7974   for node, nres in result.items():
7975     # we don't check for offline cases since this should be run only
7976     # against the master node and/or an instance's nodes
7977     nres.Raise("OS Parameters validation failed on node %s" % node)
7978     if not nres.payload:
7979       lu.LogInfo("OS %s not found on node %s, validation skipped",
7980                  osname, node)
7981
7982
7983 class LUInstanceCreate(LogicalUnit):
7984   """Create an instance.
7985
7986   """
7987   HPATH = "instance-add"
7988   HTYPE = constants.HTYPE_INSTANCE
7989   REQ_BGL = False
7990
7991   def CheckArguments(self):
7992     """Check arguments.
7993
7994     """
7995     # do not require name_check to ease forward/backward compatibility
7996     # for tools
7997     if self.op.no_install and self.op.start:
7998       self.LogInfo("No-installation mode selected, disabling startup")
7999       self.op.start = False
8000     # validate/normalize the instance name
8001     self.op.instance_name = \
8002       netutils.Hostname.GetNormalizedName(self.op.instance_name)
8003
8004     if self.op.ip_check and not self.op.name_check:
8005       # TODO: make the ip check more flexible and not depend on the name check
8006       raise errors.OpPrereqError("Cannot do IP address check without a name"
8007                                  " check", errors.ECODE_INVAL)
8008
8009     # check nics' parameter names
8010     for nic in self.op.nics:
8011       utils.ForceDictType(nic, constants.INIC_PARAMS_TYPES)
8012
8013     # check disks. parameter names and consistent adopt/no-adopt strategy
8014     has_adopt = has_no_adopt = False
8015     for disk in self.op.disks:
8016       utils.ForceDictType(disk, constants.IDISK_PARAMS_TYPES)
8017       if constants.IDISK_ADOPT in disk:
8018         has_adopt = True
8019       else:
8020         has_no_adopt = True
8021     if has_adopt and has_no_adopt:
8022       raise errors.OpPrereqError("Either all disks are adopted or none is",
8023                                  errors.ECODE_INVAL)
8024     if has_adopt:
8025       if self.op.disk_template not in constants.DTS_MAY_ADOPT:
8026         raise errors.OpPrereqError("Disk adoption is not supported for the"
8027                                    " '%s' disk template" %
8028                                    self.op.disk_template,
8029                                    errors.ECODE_INVAL)
8030       if self.op.iallocator is not None:
8031         raise errors.OpPrereqError("Disk adoption not allowed with an"
8032                                    " iallocator script", errors.ECODE_INVAL)
8033       if self.op.mode == constants.INSTANCE_IMPORT:
8034         raise errors.OpPrereqError("Disk adoption not allowed for"
8035                                    " instance import", errors.ECODE_INVAL)
8036     else:
8037       if self.op.disk_template in constants.DTS_MUST_ADOPT:
8038         raise errors.OpPrereqError("Disk template %s requires disk adoption,"
8039                                    " but no 'adopt' parameter given" %
8040                                    self.op.disk_template,
8041                                    errors.ECODE_INVAL)
8042
8043     self.adopt_disks = has_adopt
8044
8045     # instance name verification
8046     if self.op.name_check:
8047       self.hostname1 = netutils.GetHostname(name=self.op.instance_name)
8048       self.op.instance_name = self.hostname1.name
8049       # used in CheckPrereq for ip ping check
8050       self.check_ip = self.hostname1.ip
8051     else:
8052       self.check_ip = None
8053
8054     # file storage checks
8055     if (self.op.file_driver and
8056         not self.op.file_driver in constants.FILE_DRIVER):
8057       raise errors.OpPrereqError("Invalid file driver name '%s'" %
8058                                  self.op.file_driver, errors.ECODE_INVAL)
8059
8060     if self.op.disk_template == constants.DT_FILE:
8061       opcodes.RequireFileStorage()
8062     elif self.op.disk_template == constants.DT_SHARED_FILE:
8063       opcodes.RequireSharedFileStorage()
8064
8065     ### Node/iallocator related checks
8066     _CheckIAllocatorOrNode(self, "iallocator", "pnode")
8067
8068     if self.op.pnode is not None:
8069       if self.op.disk_template in constants.DTS_INT_MIRROR:
8070         if self.op.snode is None:
8071           raise errors.OpPrereqError("The networked disk templates need"
8072                                      " a mirror node", errors.ECODE_INVAL)
8073       elif self.op.snode:
8074         self.LogWarning("Secondary node will be ignored on non-mirrored disk"
8075                         " template")
8076         self.op.snode = None
8077
8078     self._cds = _GetClusterDomainSecret()
8079
8080     if self.op.mode == constants.INSTANCE_IMPORT:
8081       # On import force_variant must be True, because if we forced it at
8082       # initial install, our only chance when importing it back is that it
8083       # works again!
8084       self.op.force_variant = True
8085
8086       if self.op.no_install:
8087         self.LogInfo("No-installation mode has no effect during import")
8088
8089     elif self.op.mode == constants.INSTANCE_CREATE:
8090       if self.op.os_type is None:
8091         raise errors.OpPrereqError("No guest OS specified",
8092                                    errors.ECODE_INVAL)
8093       if self.op.os_type in self.cfg.GetClusterInfo().blacklisted_os:
8094         raise errors.OpPrereqError("Guest OS '%s' is not allowed for"
8095                                    " installation" % self.op.os_type,
8096                                    errors.ECODE_STATE)
8097       if self.op.disk_template is None:
8098         raise errors.OpPrereqError("No disk template specified",
8099                                    errors.ECODE_INVAL)
8100
8101     elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8102       # Check handshake to ensure both clusters have the same domain secret
8103       src_handshake = self.op.source_handshake
8104       if not src_handshake:
8105         raise errors.OpPrereqError("Missing source handshake",
8106                                    errors.ECODE_INVAL)
8107
8108       errmsg = masterd.instance.CheckRemoteExportHandshake(self._cds,
8109                                                            src_handshake)
8110       if errmsg:
8111         raise errors.OpPrereqError("Invalid handshake: %s" % errmsg,
8112                                    errors.ECODE_INVAL)
8113
8114       # Load and check source CA
8115       self.source_x509_ca_pem = self.op.source_x509_ca
8116       if not self.source_x509_ca_pem:
8117         raise errors.OpPrereqError("Missing source X509 CA",
8118                                    errors.ECODE_INVAL)
8119
8120       try:
8121         (cert, _) = utils.LoadSignedX509Certificate(self.source_x509_ca_pem,
8122                                                     self._cds)
8123       except OpenSSL.crypto.Error, err:
8124         raise errors.OpPrereqError("Unable to load source X509 CA (%s)" %
8125                                    (err, ), errors.ECODE_INVAL)
8126
8127       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
8128       if errcode is not None:
8129         raise errors.OpPrereqError("Invalid source X509 CA (%s)" % (msg, ),
8130                                    errors.ECODE_INVAL)
8131
8132       self.source_x509_ca = cert
8133
8134       src_instance_name = self.op.source_instance_name
8135       if not src_instance_name:
8136         raise errors.OpPrereqError("Missing source instance name",
8137                                    errors.ECODE_INVAL)
8138
8139       self.source_instance_name = \
8140           netutils.GetHostname(name=src_instance_name).name
8141
8142     else:
8143       raise errors.OpPrereqError("Invalid instance creation mode %r" %
8144                                  self.op.mode, errors.ECODE_INVAL)
8145
8146   def ExpandNames(self):
8147     """ExpandNames for CreateInstance.
8148
8149     Figure out the right locks for instance creation.
8150
8151     """
8152     self.needed_locks = {}
8153
8154     instance_name = self.op.instance_name
8155     # this is just a preventive check, but someone might still add this
8156     # instance in the meantime, and creation will fail at lock-add time
8157     if instance_name in self.cfg.GetInstanceList():
8158       raise errors.OpPrereqError("Instance '%s' is already in the cluster" %
8159                                  instance_name, errors.ECODE_EXISTS)
8160
8161     self.add_locks[locking.LEVEL_INSTANCE] = instance_name
8162
8163     if self.op.iallocator:
8164       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8165     else:
8166       self.op.pnode = _ExpandNodeName(self.cfg, self.op.pnode)
8167       nodelist = [self.op.pnode]
8168       if self.op.snode is not None:
8169         self.op.snode = _ExpandNodeName(self.cfg, self.op.snode)
8170         nodelist.append(self.op.snode)
8171       self.needed_locks[locking.LEVEL_NODE] = nodelist
8172
8173     # in case of import lock the source node too
8174     if self.op.mode == constants.INSTANCE_IMPORT:
8175       src_node = self.op.src_node
8176       src_path = self.op.src_path
8177
8178       if src_path is None:
8179         self.op.src_path = src_path = self.op.instance_name
8180
8181       if src_node is None:
8182         self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
8183         self.op.src_node = None
8184         if os.path.isabs(src_path):
8185           raise errors.OpPrereqError("Importing an instance from an absolute"
8186                                      " path requires a source node option",
8187                                      errors.ECODE_INVAL)
8188       else:
8189         self.op.src_node = src_node = _ExpandNodeName(self.cfg, src_node)
8190         if self.needed_locks[locking.LEVEL_NODE] is not locking.ALL_SET:
8191           self.needed_locks[locking.LEVEL_NODE].append(src_node)
8192         if not os.path.isabs(src_path):
8193           self.op.src_path = src_path = \
8194             utils.PathJoin(constants.EXPORT_DIR, src_path)
8195
8196   def _RunAllocator(self):
8197     """Run the allocator based on input opcode.
8198
8199     """
8200     nics = [n.ToDict() for n in self.nics]
8201     ial = IAllocator(self.cfg, self.rpc,
8202                      mode=constants.IALLOCATOR_MODE_ALLOC,
8203                      name=self.op.instance_name,
8204                      disk_template=self.op.disk_template,
8205                      tags=self.op.tags,
8206                      os=self.op.os_type,
8207                      vcpus=self.be_full[constants.BE_VCPUS],
8208                      memory=self.be_full[constants.BE_MEMORY],
8209                      disks=self.disks,
8210                      nics=nics,
8211                      hypervisor=self.op.hypervisor,
8212                      )
8213
8214     ial.Run(self.op.iallocator)
8215
8216     if not ial.success:
8217       raise errors.OpPrereqError("Can't compute nodes using"
8218                                  " iallocator '%s': %s" %
8219                                  (self.op.iallocator, ial.info),
8220                                  errors.ECODE_NORES)
8221     if len(ial.result) != ial.required_nodes:
8222       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
8223                                  " of nodes (%s), required %s" %
8224                                  (self.op.iallocator, len(ial.result),
8225                                   ial.required_nodes), errors.ECODE_FAULT)
8226     self.op.pnode = ial.result[0]
8227     self.LogInfo("Selected nodes for instance %s via iallocator %s: %s",
8228                  self.op.instance_name, self.op.iallocator,
8229                  utils.CommaJoin(ial.result))
8230     if ial.required_nodes == 2:
8231       self.op.snode = ial.result[1]
8232
8233   def BuildHooksEnv(self):
8234     """Build hooks env.
8235
8236     This runs on master, primary and secondary nodes of the instance.
8237
8238     """
8239     env = {
8240       "ADD_MODE": self.op.mode,
8241       }
8242     if self.op.mode == constants.INSTANCE_IMPORT:
8243       env["SRC_NODE"] = self.op.src_node
8244       env["SRC_PATH"] = self.op.src_path
8245       env["SRC_IMAGES"] = self.src_images
8246
8247     env.update(_BuildInstanceHookEnv(
8248       name=self.op.instance_name,
8249       primary_node=self.op.pnode,
8250       secondary_nodes=self.secondaries,
8251       status=self.op.start,
8252       os_type=self.op.os_type,
8253       memory=self.be_full[constants.BE_MEMORY],
8254       vcpus=self.be_full[constants.BE_VCPUS],
8255       nics=_NICListToTuple(self, self.nics),
8256       disk_template=self.op.disk_template,
8257       disks=[(d[constants.IDISK_SIZE], d[constants.IDISK_MODE])
8258              for d in self.disks],
8259       bep=self.be_full,
8260       hvp=self.hv_full,
8261       hypervisor_name=self.op.hypervisor,
8262       tags=self.op.tags,
8263     ))
8264
8265     return env
8266
8267   def BuildHooksNodes(self):
8268     """Build hooks nodes.
8269
8270     """
8271     nl = [self.cfg.GetMasterNode(), self.op.pnode] + self.secondaries
8272     return nl, nl
8273
8274   def _ReadExportInfo(self):
8275     """Reads the export information from disk.
8276
8277     It will override the opcode source node and path with the actual
8278     information, if these two were not specified before.
8279
8280     @return: the export information
8281
8282     """
8283     assert self.op.mode == constants.INSTANCE_IMPORT
8284
8285     src_node = self.op.src_node
8286     src_path = self.op.src_path
8287
8288     if src_node is None:
8289       locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
8290       exp_list = self.rpc.call_export_list(locked_nodes)
8291       found = False
8292       for node in exp_list:
8293         if exp_list[node].fail_msg:
8294           continue
8295         if src_path in exp_list[node].payload:
8296           found = True
8297           self.op.src_node = src_node = node
8298           self.op.src_path = src_path = utils.PathJoin(constants.EXPORT_DIR,
8299                                                        src_path)
8300           break
8301       if not found:
8302         raise errors.OpPrereqError("No export found for relative path %s" %
8303                                     src_path, errors.ECODE_INVAL)
8304
8305     _CheckNodeOnline(self, src_node)
8306     result = self.rpc.call_export_info(src_node, src_path)
8307     result.Raise("No export or invalid export found in dir %s" % src_path)
8308
8309     export_info = objects.SerializableConfigParser.Loads(str(result.payload))
8310     if not export_info.has_section(constants.INISECT_EXP):
8311       raise errors.ProgrammerError("Corrupted export config",
8312                                    errors.ECODE_ENVIRON)
8313
8314     ei_version = export_info.get(constants.INISECT_EXP, "version")
8315     if (int(ei_version) != constants.EXPORT_VERSION):
8316       raise errors.OpPrereqError("Wrong export version %s (wanted %d)" %
8317                                  (ei_version, constants.EXPORT_VERSION),
8318                                  errors.ECODE_ENVIRON)
8319     return export_info
8320
8321   def _ReadExportParams(self, einfo):
8322     """Use export parameters as defaults.
8323
8324     In case the opcode doesn't specify (as in override) some instance
8325     parameters, then try to use them from the export information, if
8326     that declares them.
8327
8328     """
8329     self.op.os_type = einfo.get(constants.INISECT_EXP, "os")
8330
8331     if self.op.disk_template is None:
8332       if einfo.has_option(constants.INISECT_INS, "disk_template"):
8333         self.op.disk_template = einfo.get(constants.INISECT_INS,
8334                                           "disk_template")
8335       else:
8336         raise errors.OpPrereqError("No disk template specified and the export"
8337                                    " is missing the disk_template information",
8338                                    errors.ECODE_INVAL)
8339
8340     if not self.op.disks:
8341       if einfo.has_option(constants.INISECT_INS, "disk_count"):
8342         disks = []
8343         # TODO: import the disk iv_name too
8344         for idx in range(einfo.getint(constants.INISECT_INS, "disk_count")):
8345           disk_sz = einfo.getint(constants.INISECT_INS, "disk%d_size" % idx)
8346           disks.append({constants.IDISK_SIZE: disk_sz})
8347         self.op.disks = disks
8348       else:
8349         raise errors.OpPrereqError("No disk info specified and the export"
8350                                    " is missing the disk information",
8351                                    errors.ECODE_INVAL)
8352
8353     if (not self.op.nics and
8354         einfo.has_option(constants.INISECT_INS, "nic_count")):
8355       nics = []
8356       for idx in range(einfo.getint(constants.INISECT_INS, "nic_count")):
8357         ndict = {}
8358         for name in list(constants.NICS_PARAMETERS) + ["ip", "mac"]:
8359           v = einfo.get(constants.INISECT_INS, "nic%d_%s" % (idx, name))
8360           ndict[name] = v
8361         nics.append(ndict)
8362       self.op.nics = nics
8363
8364     if not self.op.tags and einfo.has_option(constants.INISECT_INS, "tags"):
8365       self.op.tags = einfo.get(constants.INISECT_INS, "tags").split()
8366
8367     if (self.op.hypervisor is None and
8368         einfo.has_option(constants.INISECT_INS, "hypervisor")):
8369       self.op.hypervisor = einfo.get(constants.INISECT_INS, "hypervisor")
8370
8371     if einfo.has_section(constants.INISECT_HYP):
8372       # use the export parameters but do not override the ones
8373       # specified by the user
8374       for name, value in einfo.items(constants.INISECT_HYP):
8375         if name not in self.op.hvparams:
8376           self.op.hvparams[name] = value
8377
8378     if einfo.has_section(constants.INISECT_BEP):
8379       # use the parameters, without overriding
8380       for name, value in einfo.items(constants.INISECT_BEP):
8381         if name not in self.op.beparams:
8382           self.op.beparams[name] = value
8383     else:
8384       # try to read the parameters old style, from the main section
8385       for name in constants.BES_PARAMETERS:
8386         if (name not in self.op.beparams and
8387             einfo.has_option(constants.INISECT_INS, name)):
8388           self.op.beparams[name] = einfo.get(constants.INISECT_INS, name)
8389
8390     if einfo.has_section(constants.INISECT_OSP):
8391       # use the parameters, without overriding
8392       for name, value in einfo.items(constants.INISECT_OSP):
8393         if name not in self.op.osparams:
8394           self.op.osparams[name] = value
8395
8396   def _RevertToDefaults(self, cluster):
8397     """Revert the instance parameters to the default values.
8398
8399     """
8400     # hvparams
8401     hv_defs = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type, {})
8402     for name in self.op.hvparams.keys():
8403       if name in hv_defs and hv_defs[name] == self.op.hvparams[name]:
8404         del self.op.hvparams[name]
8405     # beparams
8406     be_defs = cluster.SimpleFillBE({})
8407     for name in self.op.beparams.keys():
8408       if name in be_defs and be_defs[name] == self.op.beparams[name]:
8409         del self.op.beparams[name]
8410     # nic params
8411     nic_defs = cluster.SimpleFillNIC({})
8412     for nic in self.op.nics:
8413       for name in constants.NICS_PARAMETERS:
8414         if name in nic and name in nic_defs and nic[name] == nic_defs[name]:
8415           del nic[name]
8416     # osparams
8417     os_defs = cluster.SimpleFillOS(self.op.os_type, {})
8418     for name in self.op.osparams.keys():
8419       if name in os_defs and os_defs[name] == self.op.osparams[name]:
8420         del self.op.osparams[name]
8421
8422   def _CalculateFileStorageDir(self):
8423     """Calculate final instance file storage dir.
8424
8425     """
8426     # file storage dir calculation/check
8427     self.instance_file_storage_dir = None
8428     if self.op.disk_template in constants.DTS_FILEBASED:
8429       # build the full file storage dir path
8430       joinargs = []
8431
8432       if self.op.disk_template == constants.DT_SHARED_FILE:
8433         get_fsd_fn = self.cfg.GetSharedFileStorageDir
8434       else:
8435         get_fsd_fn = self.cfg.GetFileStorageDir
8436
8437       cfg_storagedir = get_fsd_fn()
8438       if not cfg_storagedir:
8439         raise errors.OpPrereqError("Cluster file storage dir not defined")
8440       joinargs.append(cfg_storagedir)
8441
8442       if self.op.file_storage_dir is not None:
8443         joinargs.append(self.op.file_storage_dir)
8444
8445       joinargs.append(self.op.instance_name)
8446
8447       # pylint: disable-msg=W0142
8448       self.instance_file_storage_dir = utils.PathJoin(*joinargs)
8449
8450   def CheckPrereq(self):
8451     """Check prerequisites.
8452
8453     """
8454     self._CalculateFileStorageDir()
8455
8456     if self.op.mode == constants.INSTANCE_IMPORT:
8457       export_info = self._ReadExportInfo()
8458       self._ReadExportParams(export_info)
8459
8460     if (not self.cfg.GetVGName() and
8461         self.op.disk_template not in constants.DTS_NOT_LVM):
8462       raise errors.OpPrereqError("Cluster does not support lvm-based"
8463                                  " instances", errors.ECODE_STATE)
8464
8465     if self.op.hypervisor is None:
8466       self.op.hypervisor = self.cfg.GetHypervisorType()
8467
8468     cluster = self.cfg.GetClusterInfo()
8469     enabled_hvs = cluster.enabled_hypervisors
8470     if self.op.hypervisor not in enabled_hvs:
8471       raise errors.OpPrereqError("Selected hypervisor (%s) not enabled in the"
8472                                  " cluster (%s)" % (self.op.hypervisor,
8473                                   ",".join(enabled_hvs)),
8474                                  errors.ECODE_STATE)
8475
8476     # Check tag validity
8477     for tag in self.op.tags:
8478       objects.TaggableObject.ValidateTag(tag)
8479
8480     # check hypervisor parameter syntax (locally)
8481     utils.ForceDictType(self.op.hvparams, constants.HVS_PARAMETER_TYPES)
8482     filled_hvp = cluster.SimpleFillHV(self.op.hypervisor, self.op.os_type,
8483                                       self.op.hvparams)
8484     hv_type = hypervisor.GetHypervisor(self.op.hypervisor)
8485     hv_type.CheckParameterSyntax(filled_hvp)
8486     self.hv_full = filled_hvp
8487     # check that we don't specify global parameters on an instance
8488     _CheckGlobalHvParams(self.op.hvparams)
8489
8490     # fill and remember the beparams dict
8491     utils.ForceDictType(self.op.beparams, constants.BES_PARAMETER_TYPES)
8492     self.be_full = cluster.SimpleFillBE(self.op.beparams)
8493
8494     # build os parameters
8495     self.os_full = cluster.SimpleFillOS(self.op.os_type, self.op.osparams)
8496
8497     # now that hvp/bep are in final format, let's reset to defaults,
8498     # if told to do so
8499     if self.op.identify_defaults:
8500       self._RevertToDefaults(cluster)
8501
8502     # NIC buildup
8503     self.nics = []
8504     for idx, nic in enumerate(self.op.nics):
8505       nic_mode_req = nic.get(constants.INIC_MODE, None)
8506       nic_mode = nic_mode_req
8507       if nic_mode is None:
8508         nic_mode = cluster.nicparams[constants.PP_DEFAULT][constants.NIC_MODE]
8509
8510       # in routed mode, for the first nic, the default ip is 'auto'
8511       if nic_mode == constants.NIC_MODE_ROUTED and idx == 0:
8512         default_ip_mode = constants.VALUE_AUTO
8513       else:
8514         default_ip_mode = constants.VALUE_NONE
8515
8516       # ip validity checks
8517       ip = nic.get(constants.INIC_IP, default_ip_mode)
8518       if ip is None or ip.lower() == constants.VALUE_NONE:
8519         nic_ip = None
8520       elif ip.lower() == constants.VALUE_AUTO:
8521         if not self.op.name_check:
8522           raise errors.OpPrereqError("IP address set to auto but name checks"
8523                                      " have been skipped",
8524                                      errors.ECODE_INVAL)
8525         nic_ip = self.hostname1.ip
8526       else:
8527         if not netutils.IPAddress.IsValid(ip):
8528           raise errors.OpPrereqError("Invalid IP address '%s'" % ip,
8529                                      errors.ECODE_INVAL)
8530         nic_ip = ip
8531
8532       # TODO: check the ip address for uniqueness
8533       if nic_mode == constants.NIC_MODE_ROUTED and not nic_ip:
8534         raise errors.OpPrereqError("Routed nic mode requires an ip address",
8535                                    errors.ECODE_INVAL)
8536
8537       # MAC address verification
8538       mac = nic.get(constants.INIC_MAC, constants.VALUE_AUTO)
8539       if mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8540         mac = utils.NormalizeAndValidateMac(mac)
8541
8542         try:
8543           self.cfg.ReserveMAC(mac, self.proc.GetECId())
8544         except errors.ReservationError:
8545           raise errors.OpPrereqError("MAC address %s already in use"
8546                                      " in cluster" % mac,
8547                                      errors.ECODE_NOTUNIQUE)
8548
8549       #  Build nic parameters
8550       link = nic.get(constants.INIC_LINK, None)
8551       nicparams = {}
8552       if nic_mode_req:
8553         nicparams[constants.NIC_MODE] = nic_mode_req
8554       if link:
8555         nicparams[constants.NIC_LINK] = link
8556
8557       check_params = cluster.SimpleFillNIC(nicparams)
8558       objects.NIC.CheckParameterSyntax(check_params)
8559       self.nics.append(objects.NIC(mac=mac, ip=nic_ip, nicparams=nicparams))
8560
8561     # disk checks/pre-build
8562     default_vg = self.cfg.GetVGName()
8563     self.disks = []
8564     for disk in self.op.disks:
8565       mode = disk.get(constants.IDISK_MODE, constants.DISK_RDWR)
8566       if mode not in constants.DISK_ACCESS_SET:
8567         raise errors.OpPrereqError("Invalid disk access mode '%s'" %
8568                                    mode, errors.ECODE_INVAL)
8569       size = disk.get(constants.IDISK_SIZE, None)
8570       if size is None:
8571         raise errors.OpPrereqError("Missing disk size", errors.ECODE_INVAL)
8572       try:
8573         size = int(size)
8574       except (TypeError, ValueError):
8575         raise errors.OpPrereqError("Invalid disk size '%s'" % size,
8576                                    errors.ECODE_INVAL)
8577
8578       data_vg = disk.get(constants.IDISK_VG, default_vg)
8579       new_disk = {
8580         constants.IDISK_SIZE: size,
8581         constants.IDISK_MODE: mode,
8582         constants.IDISK_VG: data_vg,
8583         constants.IDISK_METAVG: disk.get(constants.IDISK_METAVG, data_vg),
8584         }
8585       if constants.IDISK_ADOPT in disk:
8586         new_disk[constants.IDISK_ADOPT] = disk[constants.IDISK_ADOPT]
8587       self.disks.append(new_disk)
8588
8589     if self.op.mode == constants.INSTANCE_IMPORT:
8590
8591       # Check that the new instance doesn't have less disks than the export
8592       instance_disks = len(self.disks)
8593       export_disks = export_info.getint(constants.INISECT_INS, 'disk_count')
8594       if instance_disks < export_disks:
8595         raise errors.OpPrereqError("Not enough disks to import."
8596                                    " (instance: %d, export: %d)" %
8597                                    (instance_disks, export_disks),
8598                                    errors.ECODE_INVAL)
8599
8600       disk_images = []
8601       for idx in range(export_disks):
8602         option = "disk%d_dump" % idx
8603         if export_info.has_option(constants.INISECT_INS, option):
8604           # FIXME: are the old os-es, disk sizes, etc. useful?
8605           export_name = export_info.get(constants.INISECT_INS, option)
8606           image = utils.PathJoin(self.op.src_path, export_name)
8607           disk_images.append(image)
8608         else:
8609           disk_images.append(False)
8610
8611       self.src_images = disk_images
8612
8613       old_name = export_info.get(constants.INISECT_INS, "name")
8614       try:
8615         exp_nic_count = export_info.getint(constants.INISECT_INS, "nic_count")
8616       except (TypeError, ValueError), err:
8617         raise errors.OpPrereqError("Invalid export file, nic_count is not"
8618                                    " an integer: %s" % str(err),
8619                                    errors.ECODE_STATE)
8620       if self.op.instance_name == old_name:
8621         for idx, nic in enumerate(self.nics):
8622           if nic.mac == constants.VALUE_AUTO and exp_nic_count >= idx:
8623             nic_mac_ini = "nic%d_mac" % idx
8624             nic.mac = export_info.get(constants.INISECT_INS, nic_mac_ini)
8625
8626     # ENDIF: self.op.mode == constants.INSTANCE_IMPORT
8627
8628     # ip ping checks (we use the same ip that was resolved in ExpandNames)
8629     if self.op.ip_check:
8630       if netutils.TcpPing(self.check_ip, constants.DEFAULT_NODED_PORT):
8631         raise errors.OpPrereqError("IP %s of instance %s already in use" %
8632                                    (self.check_ip, self.op.instance_name),
8633                                    errors.ECODE_NOTUNIQUE)
8634
8635     #### mac address generation
8636     # By generating here the mac address both the allocator and the hooks get
8637     # the real final mac address rather than the 'auto' or 'generate' value.
8638     # There is a race condition between the generation and the instance object
8639     # creation, which means that we know the mac is valid now, but we're not
8640     # sure it will be when we actually add the instance. If things go bad
8641     # adding the instance will abort because of a duplicate mac, and the
8642     # creation job will fail.
8643     for nic in self.nics:
8644       if nic.mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
8645         nic.mac = self.cfg.GenerateMAC(self.proc.GetECId())
8646
8647     #### allocator run
8648
8649     if self.op.iallocator is not None:
8650       self._RunAllocator()
8651
8652     #### node related checks
8653
8654     # check primary node
8655     self.pnode = pnode = self.cfg.GetNodeInfo(self.op.pnode)
8656     assert self.pnode is not None, \
8657       "Cannot retrieve locked node %s" % self.op.pnode
8658     if pnode.offline:
8659       raise errors.OpPrereqError("Cannot use offline primary node '%s'" %
8660                                  pnode.name, errors.ECODE_STATE)
8661     if pnode.drained:
8662       raise errors.OpPrereqError("Cannot use drained primary node '%s'" %
8663                                  pnode.name, errors.ECODE_STATE)
8664     if not pnode.vm_capable:
8665       raise errors.OpPrereqError("Cannot use non-vm_capable primary node"
8666                                  " '%s'" % pnode.name, errors.ECODE_STATE)
8667
8668     self.secondaries = []
8669
8670     # mirror node verification
8671     if self.op.disk_template in constants.DTS_INT_MIRROR:
8672       if self.op.snode == pnode.name:
8673         raise errors.OpPrereqError("The secondary node cannot be the"
8674                                    " primary node", errors.ECODE_INVAL)
8675       _CheckNodeOnline(self, self.op.snode)
8676       _CheckNodeNotDrained(self, self.op.snode)
8677       _CheckNodeVmCapable(self, self.op.snode)
8678       self.secondaries.append(self.op.snode)
8679
8680     nodenames = [pnode.name] + self.secondaries
8681
8682     if not self.adopt_disks:
8683       # Check lv size requirements, if not adopting
8684       req_sizes = _ComputeDiskSizePerVG(self.op.disk_template, self.disks)
8685       _CheckNodesFreeDiskPerVG(self, nodenames, req_sizes)
8686
8687     elif self.op.disk_template == constants.DT_PLAIN: # Check the adoption data
8688       all_lvs = set(["%s/%s" % (disk[constants.IDISK_VG],
8689                                 disk[constants.IDISK_ADOPT])
8690                      for disk in self.disks])
8691       if len(all_lvs) != len(self.disks):
8692         raise errors.OpPrereqError("Duplicate volume names given for adoption",
8693                                    errors.ECODE_INVAL)
8694       for lv_name in all_lvs:
8695         try:
8696           # FIXME: lv_name here is "vg/lv" need to ensure that other calls
8697           # to ReserveLV uses the same syntax
8698           self.cfg.ReserveLV(lv_name, self.proc.GetECId())
8699         except errors.ReservationError:
8700           raise errors.OpPrereqError("LV named %s used by another instance" %
8701                                      lv_name, errors.ECODE_NOTUNIQUE)
8702
8703       vg_names = self.rpc.call_vg_list([pnode.name])[pnode.name]
8704       vg_names.Raise("Cannot get VG information from node %s" % pnode.name)
8705
8706       node_lvs = self.rpc.call_lv_list([pnode.name],
8707                                        vg_names.payload.keys())[pnode.name]
8708       node_lvs.Raise("Cannot get LV information from node %s" % pnode.name)
8709       node_lvs = node_lvs.payload
8710
8711       delta = all_lvs.difference(node_lvs.keys())
8712       if delta:
8713         raise errors.OpPrereqError("Missing logical volume(s): %s" %
8714                                    utils.CommaJoin(delta),
8715                                    errors.ECODE_INVAL)
8716       online_lvs = [lv for lv in all_lvs if node_lvs[lv][2]]
8717       if online_lvs:
8718         raise errors.OpPrereqError("Online logical volumes found, cannot"
8719                                    " adopt: %s" % utils.CommaJoin(online_lvs),
8720                                    errors.ECODE_STATE)
8721       # update the size of disk based on what is found
8722       for dsk in self.disks:
8723         dsk[constants.IDISK_SIZE] = \
8724           int(float(node_lvs["%s/%s" % (dsk[constants.IDISK_VG],
8725                                         dsk[constants.IDISK_ADOPT])][0]))
8726
8727     elif self.op.disk_template == constants.DT_BLOCK:
8728       # Normalize and de-duplicate device paths
8729       all_disks = set([os.path.abspath(disk[constants.IDISK_ADOPT])
8730                        for disk in self.disks])
8731       if len(all_disks) != len(self.disks):
8732         raise errors.OpPrereqError("Duplicate disk names given for adoption",
8733                                    errors.ECODE_INVAL)
8734       baddisks = [d for d in all_disks
8735                   if not d.startswith(constants.ADOPTABLE_BLOCKDEV_ROOT)]
8736       if baddisks:
8737         raise errors.OpPrereqError("Device node(s) %s lie outside %s and"
8738                                    " cannot be adopted" %
8739                                    (", ".join(baddisks),
8740                                     constants.ADOPTABLE_BLOCKDEV_ROOT),
8741                                    errors.ECODE_INVAL)
8742
8743       node_disks = self.rpc.call_bdev_sizes([pnode.name],
8744                                             list(all_disks))[pnode.name]
8745       node_disks.Raise("Cannot get block device information from node %s" %
8746                        pnode.name)
8747       node_disks = node_disks.payload
8748       delta = all_disks.difference(node_disks.keys())
8749       if delta:
8750         raise errors.OpPrereqError("Missing block device(s): %s" %
8751                                    utils.CommaJoin(delta),
8752                                    errors.ECODE_INVAL)
8753       for dsk in self.disks:
8754         dsk[constants.IDISK_SIZE] = \
8755           int(float(node_disks[dsk[constants.IDISK_ADOPT]]))
8756
8757     _CheckHVParams(self, nodenames, self.op.hypervisor, self.op.hvparams)
8758
8759     _CheckNodeHasOS(self, pnode.name, self.op.os_type, self.op.force_variant)
8760     # check OS parameters (remotely)
8761     _CheckOSParams(self, True, nodenames, self.op.os_type, self.os_full)
8762
8763     _CheckNicsBridgesExist(self, self.nics, self.pnode.name)
8764
8765     # memory check on primary node
8766     if self.op.start:
8767       _CheckNodeFreeMemory(self, self.pnode.name,
8768                            "creating instance %s" % self.op.instance_name,
8769                            self.be_full[constants.BE_MEMORY],
8770                            self.op.hypervisor)
8771
8772     self.dry_run_result = list(nodenames)
8773
8774   def Exec(self, feedback_fn):
8775     """Create and add the instance to the cluster.
8776
8777     """
8778     instance = self.op.instance_name
8779     pnode_name = self.pnode.name
8780
8781     ht_kind = self.op.hypervisor
8782     if ht_kind in constants.HTS_REQ_PORT:
8783       network_port = self.cfg.AllocatePort()
8784     else:
8785       network_port = None
8786
8787     disks = _GenerateDiskTemplate(self,
8788                                   self.op.disk_template,
8789                                   instance, pnode_name,
8790                                   self.secondaries,
8791                                   self.disks,
8792                                   self.instance_file_storage_dir,
8793                                   self.op.file_driver,
8794                                   0,
8795                                   feedback_fn)
8796
8797     iobj = objects.Instance(name=instance, os=self.op.os_type,
8798                             primary_node=pnode_name,
8799                             nics=self.nics, disks=disks,
8800                             disk_template=self.op.disk_template,
8801                             admin_up=False,
8802                             network_port=network_port,
8803                             beparams=self.op.beparams,
8804                             hvparams=self.op.hvparams,
8805                             hypervisor=self.op.hypervisor,
8806                             osparams=self.op.osparams,
8807                             )
8808
8809     if self.op.tags:
8810       for tag in self.op.tags:
8811         iobj.AddTag(tag)
8812
8813     if self.adopt_disks:
8814       if self.op.disk_template == constants.DT_PLAIN:
8815         # rename LVs to the newly-generated names; we need to construct
8816         # 'fake' LV disks with the old data, plus the new unique_id
8817         tmp_disks = [objects.Disk.FromDict(v.ToDict()) for v in disks]
8818         rename_to = []
8819         for t_dsk, a_dsk in zip (tmp_disks, self.disks):
8820           rename_to.append(t_dsk.logical_id)
8821           t_dsk.logical_id = (t_dsk.logical_id[0], a_dsk[constants.IDISK_ADOPT])
8822           self.cfg.SetDiskID(t_dsk, pnode_name)
8823         result = self.rpc.call_blockdev_rename(pnode_name,
8824                                                zip(tmp_disks, rename_to))
8825         result.Raise("Failed to rename adoped LVs")
8826     else:
8827       feedback_fn("* creating instance disks...")
8828       try:
8829         _CreateDisks(self, iobj)
8830       except errors.OpExecError:
8831         self.LogWarning("Device creation failed, reverting...")
8832         try:
8833           _RemoveDisks(self, iobj)
8834         finally:
8835           self.cfg.ReleaseDRBDMinors(instance)
8836           raise
8837
8838     feedback_fn("adding instance %s to cluster config" % instance)
8839
8840     self.cfg.AddInstance(iobj, self.proc.GetECId())
8841
8842     # Declare that we don't want to remove the instance lock anymore, as we've
8843     # added the instance to the config
8844     del self.remove_locks[locking.LEVEL_INSTANCE]
8845
8846     if self.op.mode == constants.INSTANCE_IMPORT:
8847       # Release unused nodes
8848       _ReleaseLocks(self, locking.LEVEL_NODE, keep=[self.op.src_node])
8849     else:
8850       # Release all nodes
8851       _ReleaseLocks(self, locking.LEVEL_NODE)
8852
8853     disk_abort = False
8854     if not self.adopt_disks and self.cfg.GetClusterInfo().prealloc_wipe_disks:
8855       feedback_fn("* wiping instance disks...")
8856       try:
8857         _WipeDisks(self, iobj)
8858       except errors.OpExecError, err:
8859         logging.exception("Wiping disks failed")
8860         self.LogWarning("Wiping instance disks failed (%s)", err)
8861         disk_abort = True
8862
8863     if disk_abort:
8864       # Something is already wrong with the disks, don't do anything else
8865       pass
8866     elif self.op.wait_for_sync:
8867       disk_abort = not _WaitForSync(self, iobj)
8868     elif iobj.disk_template in constants.DTS_INT_MIRROR:
8869       # make sure the disks are not degraded (still sync-ing is ok)
8870       time.sleep(15)
8871       feedback_fn("* checking mirrors status")
8872       disk_abort = not _WaitForSync(self, iobj, oneshot=True)
8873     else:
8874       disk_abort = False
8875
8876     if disk_abort:
8877       _RemoveDisks(self, iobj)
8878       self.cfg.RemoveInstance(iobj.name)
8879       # Make sure the instance lock gets removed
8880       self.remove_locks[locking.LEVEL_INSTANCE] = iobj.name
8881       raise errors.OpExecError("There are some degraded disks for"
8882                                " this instance")
8883
8884     if iobj.disk_template != constants.DT_DISKLESS and not self.adopt_disks:
8885       if self.op.mode == constants.INSTANCE_CREATE:
8886         if not self.op.no_install:
8887           pause_sync = (iobj.disk_template in constants.DTS_INT_MIRROR and
8888                         not self.op.wait_for_sync)
8889           if pause_sync:
8890             feedback_fn("* pausing disk sync to install instance OS")
8891             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8892                                                               iobj.disks, True)
8893             for idx, success in enumerate(result.payload):
8894               if not success:
8895                 logging.warn("pause-sync of instance %s for disk %d failed",
8896                              instance, idx)
8897
8898           feedback_fn("* running the instance OS create scripts...")
8899           # FIXME: pass debug option from opcode to backend
8900           result = self.rpc.call_instance_os_add(pnode_name, iobj, False,
8901                                                  self.op.debug_level)
8902           if pause_sync:
8903             feedback_fn("* resuming disk sync")
8904             result = self.rpc.call_blockdev_pause_resume_sync(pnode_name,
8905                                                               iobj.disks, False)
8906             for idx, success in enumerate(result.payload):
8907               if not success:
8908                 logging.warn("resume-sync of instance %s for disk %d failed",
8909                              instance, idx)
8910
8911           result.Raise("Could not add os for instance %s"
8912                        " on node %s" % (instance, pnode_name))
8913
8914       elif self.op.mode == constants.INSTANCE_IMPORT:
8915         feedback_fn("* running the instance OS import scripts...")
8916
8917         transfers = []
8918
8919         for idx, image in enumerate(self.src_images):
8920           if not image:
8921             continue
8922
8923           # FIXME: pass debug option from opcode to backend
8924           dt = masterd.instance.DiskTransfer("disk/%s" % idx,
8925                                              constants.IEIO_FILE, (image, ),
8926                                              constants.IEIO_SCRIPT,
8927                                              (iobj.disks[idx], idx),
8928                                              None)
8929           transfers.append(dt)
8930
8931         import_result = \
8932           masterd.instance.TransferInstanceData(self, feedback_fn,
8933                                                 self.op.src_node, pnode_name,
8934                                                 self.pnode.secondary_ip,
8935                                                 iobj, transfers)
8936         if not compat.all(import_result):
8937           self.LogWarning("Some disks for instance %s on node %s were not"
8938                           " imported successfully" % (instance, pnode_name))
8939
8940       elif self.op.mode == constants.INSTANCE_REMOTE_IMPORT:
8941         feedback_fn("* preparing remote import...")
8942         # The source cluster will stop the instance before attempting to make a
8943         # connection. In some cases stopping an instance can take a long time,
8944         # hence the shutdown timeout is added to the connection timeout.
8945         connect_timeout = (constants.RIE_CONNECT_TIMEOUT +
8946                            self.op.source_shutdown_timeout)
8947         timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
8948
8949         assert iobj.primary_node == self.pnode.name
8950         disk_results = \
8951           masterd.instance.RemoteImport(self, feedback_fn, iobj, self.pnode,
8952                                         self.source_x509_ca,
8953                                         self._cds, timeouts)
8954         if not compat.all(disk_results):
8955           # TODO: Should the instance still be started, even if some disks
8956           # failed to import (valid for local imports, too)?
8957           self.LogWarning("Some disks for instance %s on node %s were not"
8958                           " imported successfully" % (instance, pnode_name))
8959
8960         # Run rename script on newly imported instance
8961         assert iobj.name == instance
8962         feedback_fn("Running rename script for %s" % instance)
8963         result = self.rpc.call_instance_run_rename(pnode_name, iobj,
8964                                                    self.source_instance_name,
8965                                                    self.op.debug_level)
8966         if result.fail_msg:
8967           self.LogWarning("Failed to run rename script for %s on node"
8968                           " %s: %s" % (instance, pnode_name, result.fail_msg))
8969
8970       else:
8971         # also checked in the prereq part
8972         raise errors.ProgrammerError("Unknown OS initialization mode '%s'"
8973                                      % self.op.mode)
8974
8975     if self.op.start:
8976       iobj.admin_up = True
8977       self.cfg.Update(iobj, feedback_fn)
8978       logging.info("Starting instance %s on node %s", instance, pnode_name)
8979       feedback_fn("* starting instance...")
8980       result = self.rpc.call_instance_start(pnode_name, iobj,
8981                                             None, None, False)
8982       result.Raise("Could not start instance")
8983
8984     return list(iobj.all_nodes)
8985
8986
8987 class LUInstanceConsole(NoHooksLU):
8988   """Connect to an instance's console.
8989
8990   This is somewhat special in that it returns the command line that
8991   you need to run on the master node in order to connect to the
8992   console.
8993
8994   """
8995   REQ_BGL = False
8996
8997   def ExpandNames(self):
8998     self._ExpandAndLockInstance()
8999
9000   def CheckPrereq(self):
9001     """Check prerequisites.
9002
9003     This checks that the instance is in the cluster.
9004
9005     """
9006     self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
9007     assert self.instance is not None, \
9008       "Cannot retrieve locked instance %s" % self.op.instance_name
9009     _CheckNodeOnline(self, self.instance.primary_node)
9010
9011   def Exec(self, feedback_fn):
9012     """Connect to the console of an instance
9013
9014     """
9015     instance = self.instance
9016     node = instance.primary_node
9017
9018     node_insts = self.rpc.call_instance_list([node],
9019                                              [instance.hypervisor])[node]
9020     node_insts.Raise("Can't get node information from %s" % node)
9021
9022     if instance.name not in node_insts.payload:
9023       if instance.admin_up:
9024         state = constants.INSTST_ERRORDOWN
9025       else:
9026         state = constants.INSTST_ADMINDOWN
9027       raise errors.OpExecError("Instance %s is not running (state %s)" %
9028                                (instance.name, state))
9029
9030     logging.debug("Connecting to console of %s on %s", instance.name, node)
9031
9032     return _GetInstanceConsole(self.cfg.GetClusterInfo(), instance)
9033
9034
9035 def _GetInstanceConsole(cluster, instance):
9036   """Returns console information for an instance.
9037
9038   @type cluster: L{objects.Cluster}
9039   @type instance: L{objects.Instance}
9040   @rtype: dict
9041
9042   """
9043   hyper = hypervisor.GetHypervisor(instance.hypervisor)
9044   # beparams and hvparams are passed separately, to avoid editing the
9045   # instance and then saving the defaults in the instance itself.
9046   hvparams = cluster.FillHV(instance)
9047   beparams = cluster.FillBE(instance)
9048   console = hyper.GetInstanceConsole(instance, hvparams, beparams)
9049
9050   assert console.instance == instance.name
9051   assert console.Validate()
9052
9053   return console.ToDict()
9054
9055
9056 class LUInstanceReplaceDisks(LogicalUnit):
9057   """Replace the disks of an instance.
9058
9059   """
9060   HPATH = "mirrors-replace"
9061   HTYPE = constants.HTYPE_INSTANCE
9062   REQ_BGL = False
9063
9064   def CheckArguments(self):
9065     TLReplaceDisks.CheckArguments(self.op.mode, self.op.remote_node,
9066                                   self.op.iallocator)
9067
9068   def ExpandNames(self):
9069     self._ExpandAndLockInstance()
9070
9071     assert locking.LEVEL_NODE not in self.needed_locks
9072     assert locking.LEVEL_NODEGROUP not in self.needed_locks
9073
9074     assert self.op.iallocator is None or self.op.remote_node is None, \
9075       "Conflicting options"
9076
9077     if self.op.remote_node is not None:
9078       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9079
9080       # Warning: do not remove the locking of the new secondary here
9081       # unless DRBD8.AddChildren is changed to work in parallel;
9082       # currently it doesn't since parallel invocations of
9083       # FindUnusedMinor will conflict
9084       self.needed_locks[locking.LEVEL_NODE] = [self.op.remote_node]
9085       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
9086     else:
9087       self.needed_locks[locking.LEVEL_NODE] = []
9088       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
9089
9090       if self.op.iallocator is not None:
9091         # iallocator will select a new node in the same group
9092         self.needed_locks[locking.LEVEL_NODEGROUP] = []
9093
9094     self.replacer = TLReplaceDisks(self, self.op.instance_name, self.op.mode,
9095                                    self.op.iallocator, self.op.remote_node,
9096                                    self.op.disks, False, self.op.early_release)
9097
9098     self.tasklets = [self.replacer]
9099
9100   def DeclareLocks(self, level):
9101     if level == locking.LEVEL_NODEGROUP:
9102       assert self.op.remote_node is None
9103       assert self.op.iallocator is not None
9104       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
9105
9106       self.share_locks[locking.LEVEL_NODEGROUP] = 1
9107       self.needed_locks[locking.LEVEL_NODEGROUP] = \
9108         self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9109
9110     elif level == locking.LEVEL_NODE:
9111       if self.op.iallocator is not None:
9112         assert self.op.remote_node is None
9113         assert not self.needed_locks[locking.LEVEL_NODE]
9114
9115         # Lock member nodes of all locked groups
9116         self.needed_locks[locking.LEVEL_NODE] = [node_name
9117           for group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
9118           for node_name in self.cfg.GetNodeGroup(group_uuid).members]
9119       else:
9120         self._LockInstancesNodes()
9121
9122   def BuildHooksEnv(self):
9123     """Build hooks env.
9124
9125     This runs on the master, the primary and all the secondaries.
9126
9127     """
9128     instance = self.replacer.instance
9129     env = {
9130       "MODE": self.op.mode,
9131       "NEW_SECONDARY": self.op.remote_node,
9132       "OLD_SECONDARY": instance.secondary_nodes[0],
9133       }
9134     env.update(_BuildInstanceHookEnvByObject(self, instance))
9135     return env
9136
9137   def BuildHooksNodes(self):
9138     """Build hooks nodes.
9139
9140     """
9141     instance = self.replacer.instance
9142     nl = [
9143       self.cfg.GetMasterNode(),
9144       instance.primary_node,
9145       ]
9146     if self.op.remote_node is not None:
9147       nl.append(self.op.remote_node)
9148     return nl, nl
9149
9150   def CheckPrereq(self):
9151     """Check prerequisites.
9152
9153     """
9154     assert (self.glm.is_owned(locking.LEVEL_NODEGROUP) or
9155             self.op.iallocator is None)
9156
9157     owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
9158     if owned_groups:
9159       groups = self.cfg.GetInstanceNodeGroups(self.op.instance_name)
9160       if owned_groups != groups:
9161         raise errors.OpExecError("Node groups used by instance '%s' changed"
9162                                  " since lock was acquired, current list is %r,"
9163                                  " used to be '%s'" %
9164                                  (self.op.instance_name,
9165                                   utils.CommaJoin(groups),
9166                                   utils.CommaJoin(owned_groups)))
9167
9168     return LogicalUnit.CheckPrereq(self)
9169
9170
9171 class TLReplaceDisks(Tasklet):
9172   """Replaces disks for an instance.
9173
9174   Note: Locking is not within the scope of this class.
9175
9176   """
9177   def __init__(self, lu, instance_name, mode, iallocator_name, remote_node,
9178                disks, delay_iallocator, early_release):
9179     """Initializes this class.
9180
9181     """
9182     Tasklet.__init__(self, lu)
9183
9184     # Parameters
9185     self.instance_name = instance_name
9186     self.mode = mode
9187     self.iallocator_name = iallocator_name
9188     self.remote_node = remote_node
9189     self.disks = disks
9190     self.delay_iallocator = delay_iallocator
9191     self.early_release = early_release
9192
9193     # Runtime data
9194     self.instance = None
9195     self.new_node = None
9196     self.target_node = None
9197     self.other_node = None
9198     self.remote_node_info = None
9199     self.node_secondary_ip = None
9200
9201   @staticmethod
9202   def CheckArguments(mode, remote_node, iallocator):
9203     """Helper function for users of this class.
9204
9205     """
9206     # check for valid parameter combination
9207     if mode == constants.REPLACE_DISK_CHG:
9208       if remote_node is None and iallocator is None:
9209         raise errors.OpPrereqError("When changing the secondary either an"
9210                                    " iallocator script must be used or the"
9211                                    " new node given", errors.ECODE_INVAL)
9212
9213       if remote_node is not None and iallocator is not None:
9214         raise errors.OpPrereqError("Give either the iallocator or the new"
9215                                    " secondary, not both", errors.ECODE_INVAL)
9216
9217     elif remote_node is not None or iallocator is not None:
9218       # Not replacing the secondary
9219       raise errors.OpPrereqError("The iallocator and new node options can"
9220                                  " only be used when changing the"
9221                                  " secondary node", errors.ECODE_INVAL)
9222
9223   @staticmethod
9224   def _RunAllocator(lu, iallocator_name, instance_name, relocate_from):
9225     """Compute a new secondary node using an IAllocator.
9226
9227     """
9228     ial = IAllocator(lu.cfg, lu.rpc,
9229                      mode=constants.IALLOCATOR_MODE_RELOC,
9230                      name=instance_name,
9231                      relocate_from=list(relocate_from))
9232
9233     ial.Run(iallocator_name)
9234
9235     if not ial.success:
9236       raise errors.OpPrereqError("Can't compute nodes using iallocator '%s':"
9237                                  " %s" % (iallocator_name, ial.info),
9238                                  errors.ECODE_NORES)
9239
9240     if len(ial.result) != ial.required_nodes:
9241       raise errors.OpPrereqError("iallocator '%s' returned invalid number"
9242                                  " of nodes (%s), required %s" %
9243                                  (iallocator_name,
9244                                   len(ial.result), ial.required_nodes),
9245                                  errors.ECODE_FAULT)
9246
9247     remote_node_name = ial.result[0]
9248
9249     lu.LogInfo("Selected new secondary for instance '%s': %s",
9250                instance_name, remote_node_name)
9251
9252     return remote_node_name
9253
9254   def _FindFaultyDisks(self, node_name):
9255     return _FindFaultyInstanceDisks(self.cfg, self.rpc, self.instance,
9256                                     node_name, True)
9257
9258   def _CheckDisksActivated(self, instance):
9259     """Checks if the instance disks are activated.
9260
9261     @param instance: The instance to check disks
9262     @return: True if they are activated, False otherwise
9263
9264     """
9265     nodes = instance.all_nodes
9266
9267     for idx, dev in enumerate(instance.disks):
9268       for node in nodes:
9269         self.lu.LogInfo("Checking disk/%d on %s", idx, node)
9270         self.cfg.SetDiskID(dev, node)
9271
9272         result = self.rpc.call_blockdev_find(node, dev)
9273
9274         if result.offline:
9275           continue
9276         elif result.fail_msg or not result.payload:
9277           return False
9278
9279     return True
9280
9281   def CheckPrereq(self):
9282     """Check prerequisites.
9283
9284     This checks that the instance is in the cluster.
9285
9286     """
9287     self.instance = instance = self.cfg.GetInstanceInfo(self.instance_name)
9288     assert instance is not None, \
9289       "Cannot retrieve locked instance %s" % self.instance_name
9290
9291     if instance.disk_template != constants.DT_DRBD8:
9292       raise errors.OpPrereqError("Can only run replace disks for DRBD8-based"
9293                                  " instances", errors.ECODE_INVAL)
9294
9295     if len(instance.secondary_nodes) != 1:
9296       raise errors.OpPrereqError("The instance has a strange layout,"
9297                                  " expected one secondary but found %d" %
9298                                  len(instance.secondary_nodes),
9299                                  errors.ECODE_FAULT)
9300
9301     if not self.delay_iallocator:
9302       self._CheckPrereq2()
9303
9304   def _CheckPrereq2(self):
9305     """Check prerequisites, second part.
9306
9307     This function should always be part of CheckPrereq. It was separated and is
9308     now called from Exec because during node evacuation iallocator was only
9309     called with an unmodified cluster model, not taking planned changes into
9310     account.
9311
9312     """
9313     instance = self.instance
9314     secondary_node = instance.secondary_nodes[0]
9315
9316     if self.iallocator_name is None:
9317       remote_node = self.remote_node
9318     else:
9319       remote_node = self._RunAllocator(self.lu, self.iallocator_name,
9320                                        instance.name, instance.secondary_nodes)
9321
9322     if remote_node is None:
9323       self.remote_node_info = None
9324     else:
9325       assert remote_node in self.lu.glm.list_owned(locking.LEVEL_NODE), \
9326              "Remote node '%s' is not locked" % remote_node
9327
9328       self.remote_node_info = self.cfg.GetNodeInfo(remote_node)
9329       assert self.remote_node_info is not None, \
9330         "Cannot retrieve locked node %s" % remote_node
9331
9332     if remote_node == self.instance.primary_node:
9333       raise errors.OpPrereqError("The specified node is the primary node of"
9334                                  " the instance", errors.ECODE_INVAL)
9335
9336     if remote_node == secondary_node:
9337       raise errors.OpPrereqError("The specified node is already the"
9338                                  " secondary node of the instance",
9339                                  errors.ECODE_INVAL)
9340
9341     if self.disks and self.mode in (constants.REPLACE_DISK_AUTO,
9342                                     constants.REPLACE_DISK_CHG):
9343       raise errors.OpPrereqError("Cannot specify disks to be replaced",
9344                                  errors.ECODE_INVAL)
9345
9346     if self.mode == constants.REPLACE_DISK_AUTO:
9347       if not self._CheckDisksActivated(instance):
9348         raise errors.OpPrereqError("Please run activate-disks on instance %s"
9349                                    " first" % self.instance_name,
9350                                    errors.ECODE_STATE)
9351       faulty_primary = self._FindFaultyDisks(instance.primary_node)
9352       faulty_secondary = self._FindFaultyDisks(secondary_node)
9353
9354       if faulty_primary and faulty_secondary:
9355         raise errors.OpPrereqError("Instance %s has faulty disks on more than"
9356                                    " one node and can not be repaired"
9357                                    " automatically" % self.instance_name,
9358                                    errors.ECODE_STATE)
9359
9360       if faulty_primary:
9361         self.disks = faulty_primary
9362         self.target_node = instance.primary_node
9363         self.other_node = secondary_node
9364         check_nodes = [self.target_node, self.other_node]
9365       elif faulty_secondary:
9366         self.disks = faulty_secondary
9367         self.target_node = secondary_node
9368         self.other_node = instance.primary_node
9369         check_nodes = [self.target_node, self.other_node]
9370       else:
9371         self.disks = []
9372         check_nodes = []
9373
9374     else:
9375       # Non-automatic modes
9376       if self.mode == constants.REPLACE_DISK_PRI:
9377         self.target_node = instance.primary_node
9378         self.other_node = secondary_node
9379         check_nodes = [self.target_node, self.other_node]
9380
9381       elif self.mode == constants.REPLACE_DISK_SEC:
9382         self.target_node = secondary_node
9383         self.other_node = instance.primary_node
9384         check_nodes = [self.target_node, self.other_node]
9385
9386       elif self.mode == constants.REPLACE_DISK_CHG:
9387         self.new_node = remote_node
9388         self.other_node = instance.primary_node
9389         self.target_node = secondary_node
9390         check_nodes = [self.new_node, self.other_node]
9391
9392         _CheckNodeNotDrained(self.lu, remote_node)
9393         _CheckNodeVmCapable(self.lu, remote_node)
9394
9395         old_node_info = self.cfg.GetNodeInfo(secondary_node)
9396         assert old_node_info is not None
9397         if old_node_info.offline and not self.early_release:
9398           # doesn't make sense to delay the release
9399           self.early_release = True
9400           self.lu.LogInfo("Old secondary %s is offline, automatically enabling"
9401                           " early-release mode", secondary_node)
9402
9403       else:
9404         raise errors.ProgrammerError("Unhandled disk replace mode (%s)" %
9405                                      self.mode)
9406
9407       # If not specified all disks should be replaced
9408       if not self.disks:
9409         self.disks = range(len(self.instance.disks))
9410
9411     for node in check_nodes:
9412       _CheckNodeOnline(self.lu, node)
9413
9414     touched_nodes = frozenset(node_name for node_name in [self.new_node,
9415                                                           self.other_node,
9416                                                           self.target_node]
9417                               if node_name is not None)
9418
9419     # Release unneeded node locks
9420     _ReleaseLocks(self.lu, locking.LEVEL_NODE, keep=touched_nodes)
9421
9422     # Release any owned node group
9423     if self.lu.glm.is_owned(locking.LEVEL_NODEGROUP):
9424       _ReleaseLocks(self.lu, locking.LEVEL_NODEGROUP)
9425
9426     # Check whether disks are valid
9427     for disk_idx in self.disks:
9428       instance.FindDisk(disk_idx)
9429
9430     # Get secondary node IP addresses
9431     self.node_secondary_ip = dict((name, node.secondary_ip) for (name, node)
9432                                   in self.cfg.GetMultiNodeInfo(touched_nodes))
9433
9434   def Exec(self, feedback_fn):
9435     """Execute disk replacement.
9436
9437     This dispatches the disk replacement to the appropriate handler.
9438
9439     """
9440     if self.delay_iallocator:
9441       self._CheckPrereq2()
9442
9443     if __debug__:
9444       # Verify owned locks before starting operation
9445       owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9446       assert set(owned_locks) == set(self.node_secondary_ip), \
9447           ("Incorrect node locks, owning %s, expected %s" %
9448            (owned_locks, self.node_secondary_ip.keys()))
9449
9450       owned_locks = self.lu.glm.list_owned(locking.LEVEL_INSTANCE)
9451       assert list(owned_locks) == [self.instance_name], \
9452           "Instance '%s' not locked" % self.instance_name
9453
9454       assert not self.lu.glm.is_owned(locking.LEVEL_NODEGROUP), \
9455           "Should not own any node group lock at this point"
9456
9457     if not self.disks:
9458       feedback_fn("No disks need replacement")
9459       return
9460
9461     feedback_fn("Replacing disk(s) %s for %s" %
9462                 (utils.CommaJoin(self.disks), self.instance.name))
9463
9464     activate_disks = (not self.instance.admin_up)
9465
9466     # Activate the instance disks if we're replacing them on a down instance
9467     if activate_disks:
9468       _StartInstanceDisks(self.lu, self.instance, True)
9469
9470     try:
9471       # Should we replace the secondary node?
9472       if self.new_node is not None:
9473         fn = self._ExecDrbd8Secondary
9474       else:
9475         fn = self._ExecDrbd8DiskOnly
9476
9477       result = fn(feedback_fn)
9478     finally:
9479       # Deactivate the instance disks if we're replacing them on a
9480       # down instance
9481       if activate_disks:
9482         _SafeShutdownInstanceDisks(self.lu, self.instance)
9483
9484     if __debug__:
9485       # Verify owned locks
9486       owned_locks = self.lu.glm.list_owned(locking.LEVEL_NODE)
9487       nodes = frozenset(self.node_secondary_ip)
9488       assert ((self.early_release and not owned_locks) or
9489               (not self.early_release and not (set(owned_locks) - nodes))), \
9490         ("Not owning the correct locks, early_release=%s, owned=%r,"
9491          " nodes=%r" % (self.early_release, owned_locks, nodes))
9492
9493     return result
9494
9495   def _CheckVolumeGroup(self, nodes):
9496     self.lu.LogInfo("Checking volume groups")
9497
9498     vgname = self.cfg.GetVGName()
9499
9500     # Make sure volume group exists on all involved nodes
9501     results = self.rpc.call_vg_list(nodes)
9502     if not results:
9503       raise errors.OpExecError("Can't list volume groups on the nodes")
9504
9505     for node in nodes:
9506       res = results[node]
9507       res.Raise("Error checking node %s" % node)
9508       if vgname not in res.payload:
9509         raise errors.OpExecError("Volume group '%s' not found on node %s" %
9510                                  (vgname, node))
9511
9512   def _CheckDisksExistence(self, nodes):
9513     # Check disk existence
9514     for idx, dev in enumerate(self.instance.disks):
9515       if idx not in self.disks:
9516         continue
9517
9518       for node in nodes:
9519         self.lu.LogInfo("Checking disk/%d on %s" % (idx, node))
9520         self.cfg.SetDiskID(dev, node)
9521
9522         result = self.rpc.call_blockdev_find(node, dev)
9523
9524         msg = result.fail_msg
9525         if msg or not result.payload:
9526           if not msg:
9527             msg = "disk not found"
9528           raise errors.OpExecError("Can't find disk/%d on node %s: %s" %
9529                                    (idx, node, msg))
9530
9531   def _CheckDisksConsistency(self, node_name, on_primary, ldisk):
9532     for idx, dev in enumerate(self.instance.disks):
9533       if idx not in self.disks:
9534         continue
9535
9536       self.lu.LogInfo("Checking disk/%d consistency on node %s" %
9537                       (idx, node_name))
9538
9539       if not _CheckDiskConsistency(self.lu, dev, node_name, on_primary,
9540                                    ldisk=ldisk):
9541         raise errors.OpExecError("Node %s has degraded storage, unsafe to"
9542                                  " replace disks for instance %s" %
9543                                  (node_name, self.instance.name))
9544
9545   def _CreateNewStorage(self, node_name):
9546     """Create new storage on the primary or secondary node.
9547
9548     This is only used for same-node replaces, not for changing the
9549     secondary node, hence we don't want to modify the existing disk.
9550
9551     """
9552     iv_names = {}
9553
9554     for idx, dev in enumerate(self.instance.disks):
9555       if idx not in self.disks:
9556         continue
9557
9558       self.lu.LogInfo("Adding storage on %s for disk/%d" % (node_name, idx))
9559
9560       self.cfg.SetDiskID(dev, node_name)
9561
9562       lv_names = [".disk%d_%s" % (idx, suffix) for suffix in ["data", "meta"]]
9563       names = _GenerateUniqueNames(self.lu, lv_names)
9564
9565       vg_data = dev.children[0].logical_id[0]
9566       lv_data = objects.Disk(dev_type=constants.LD_LV, size=dev.size,
9567                              logical_id=(vg_data, names[0]))
9568       vg_meta = dev.children[1].logical_id[0]
9569       lv_meta = objects.Disk(dev_type=constants.LD_LV, size=128,
9570                              logical_id=(vg_meta, names[1]))
9571
9572       new_lvs = [lv_data, lv_meta]
9573       old_lvs = [child.Copy() for child in dev.children]
9574       iv_names[dev.iv_name] = (dev, old_lvs, new_lvs)
9575
9576       # we pass force_create=True to force the LVM creation
9577       for new_lv in new_lvs:
9578         _CreateBlockDev(self.lu, node_name, self.instance, new_lv, True,
9579                         _GetInstanceInfoText(self.instance), False)
9580
9581     return iv_names
9582
9583   def _CheckDevices(self, node_name, iv_names):
9584     for name, (dev, _, _) in iv_names.iteritems():
9585       self.cfg.SetDiskID(dev, node_name)
9586
9587       result = self.rpc.call_blockdev_find(node_name, dev)
9588
9589       msg = result.fail_msg
9590       if msg or not result.payload:
9591         if not msg:
9592           msg = "disk not found"
9593         raise errors.OpExecError("Can't find DRBD device %s: %s" %
9594                                  (name, msg))
9595
9596       if result.payload.is_degraded:
9597         raise errors.OpExecError("DRBD device %s is degraded!" % name)
9598
9599   def _RemoveOldStorage(self, node_name, iv_names):
9600     for name, (_, old_lvs, _) in iv_names.iteritems():
9601       self.lu.LogInfo("Remove logical volumes for %s" % name)
9602
9603       for lv in old_lvs:
9604         self.cfg.SetDiskID(lv, node_name)
9605
9606         msg = self.rpc.call_blockdev_remove(node_name, lv).fail_msg
9607         if msg:
9608           self.lu.LogWarning("Can't remove old LV: %s" % msg,
9609                              hint="remove unused LVs manually")
9610
9611   def _ExecDrbd8DiskOnly(self, feedback_fn): # pylint: disable-msg=W0613
9612     """Replace a disk on the primary or secondary for DRBD 8.
9613
9614     The algorithm for replace is quite complicated:
9615
9616       1. for each disk to be replaced:
9617
9618         1. create new LVs on the target node with unique names
9619         1. detach old LVs from the drbd device
9620         1. rename old LVs to name_replaced.<time_t>
9621         1. rename new LVs to old LVs
9622         1. attach the new LVs (with the old names now) to the drbd device
9623
9624       1. wait for sync across all devices
9625
9626       1. for each modified disk:
9627
9628         1. remove old LVs (which have the name name_replaces.<time_t>)
9629
9630     Failures are not very well handled.
9631
9632     """
9633     steps_total = 6
9634
9635     # Step: check device activation
9636     self.lu.LogStep(1, steps_total, "Check device existence")
9637     self._CheckDisksExistence([self.other_node, self.target_node])
9638     self._CheckVolumeGroup([self.target_node, self.other_node])
9639
9640     # Step: check other node consistency
9641     self.lu.LogStep(2, steps_total, "Check peer consistency")
9642     self._CheckDisksConsistency(self.other_node,
9643                                 self.other_node == self.instance.primary_node,
9644                                 False)
9645
9646     # Step: create new storage
9647     self.lu.LogStep(3, steps_total, "Allocate new storage")
9648     iv_names = self._CreateNewStorage(self.target_node)
9649
9650     # Step: for each lv, detach+rename*2+attach
9651     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9652     for dev, old_lvs, new_lvs in iv_names.itervalues():
9653       self.lu.LogInfo("Detaching %s drbd from local storage" % dev.iv_name)
9654
9655       result = self.rpc.call_blockdev_removechildren(self.target_node, dev,
9656                                                      old_lvs)
9657       result.Raise("Can't detach drbd from local storage on node"
9658                    " %s for device %s" % (self.target_node, dev.iv_name))
9659       #dev.children = []
9660       #cfg.Update(instance)
9661
9662       # ok, we created the new LVs, so now we know we have the needed
9663       # storage; as such, we proceed on the target node to rename
9664       # old_lv to _old, and new_lv to old_lv; note that we rename LVs
9665       # using the assumption that logical_id == physical_id (which in
9666       # turn is the unique_id on that node)
9667
9668       # FIXME(iustin): use a better name for the replaced LVs
9669       temp_suffix = int(time.time())
9670       ren_fn = lambda d, suff: (d.physical_id[0],
9671                                 d.physical_id[1] + "_replaced-%s" % suff)
9672
9673       # Build the rename list based on what LVs exist on the node
9674       rename_old_to_new = []
9675       for to_ren in old_lvs:
9676         result = self.rpc.call_blockdev_find(self.target_node, to_ren)
9677         if not result.fail_msg and result.payload:
9678           # device exists
9679           rename_old_to_new.append((to_ren, ren_fn(to_ren, temp_suffix)))
9680
9681       self.lu.LogInfo("Renaming the old LVs on the target node")
9682       result = self.rpc.call_blockdev_rename(self.target_node,
9683                                              rename_old_to_new)
9684       result.Raise("Can't rename old LVs on node %s" % self.target_node)
9685
9686       # Now we rename the new LVs to the old LVs
9687       self.lu.LogInfo("Renaming the new LVs on the target node")
9688       rename_new_to_old = [(new, old.physical_id)
9689                            for old, new in zip(old_lvs, new_lvs)]
9690       result = self.rpc.call_blockdev_rename(self.target_node,
9691                                              rename_new_to_old)
9692       result.Raise("Can't rename new LVs on node %s" % self.target_node)
9693
9694       # Intermediate steps of in memory modifications
9695       for old, new in zip(old_lvs, new_lvs):
9696         new.logical_id = old.logical_id
9697         self.cfg.SetDiskID(new, self.target_node)
9698
9699       # We need to modify old_lvs so that removal later removes the
9700       # right LVs, not the newly added ones; note that old_lvs is a
9701       # copy here
9702       for disk in old_lvs:
9703         disk.logical_id = ren_fn(disk, temp_suffix)
9704         self.cfg.SetDiskID(disk, self.target_node)
9705
9706       # Now that the new lvs have the old name, we can add them to the device
9707       self.lu.LogInfo("Adding new mirror component on %s" % self.target_node)
9708       result = self.rpc.call_blockdev_addchildren(self.target_node, dev,
9709                                                   new_lvs)
9710       msg = result.fail_msg
9711       if msg:
9712         for new_lv in new_lvs:
9713           msg2 = self.rpc.call_blockdev_remove(self.target_node,
9714                                                new_lv).fail_msg
9715           if msg2:
9716             self.lu.LogWarning("Can't rollback device %s: %s", dev, msg2,
9717                                hint=("cleanup manually the unused logical"
9718                                      "volumes"))
9719         raise errors.OpExecError("Can't add local storage to drbd: %s" % msg)
9720
9721     cstep = 5
9722     if self.early_release:
9723       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9724       cstep += 1
9725       self._RemoveOldStorage(self.target_node, iv_names)
9726       # WARNING: we release both node locks here, do not do other RPCs
9727       # than WaitForSync to the primary node
9728       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9729                     names=[self.target_node, self.other_node])
9730
9731     # Wait for sync
9732     # This can fail as the old devices are degraded and _WaitForSync
9733     # does a combined result over all disks, so we don't check its return value
9734     self.lu.LogStep(cstep, steps_total, "Sync devices")
9735     cstep += 1
9736     _WaitForSync(self.lu, self.instance)
9737
9738     # Check all devices manually
9739     self._CheckDevices(self.instance.primary_node, iv_names)
9740
9741     # Step: remove old storage
9742     if not self.early_release:
9743       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9744       cstep += 1
9745       self._RemoveOldStorage(self.target_node, iv_names)
9746
9747   def _ExecDrbd8Secondary(self, feedback_fn):
9748     """Replace the secondary node for DRBD 8.
9749
9750     The algorithm for replace is quite complicated:
9751       - for all disks of the instance:
9752         - create new LVs on the new node with same names
9753         - shutdown the drbd device on the old secondary
9754         - disconnect the drbd network on the primary
9755         - create the drbd device on the new secondary
9756         - network attach the drbd on the primary, using an artifice:
9757           the drbd code for Attach() will connect to the network if it
9758           finds a device which is connected to the good local disks but
9759           not network enabled
9760       - wait for sync across all devices
9761       - remove all disks from the old secondary
9762
9763     Failures are not very well handled.
9764
9765     """
9766     steps_total = 6
9767
9768     # Step: check device activation
9769     self.lu.LogStep(1, steps_total, "Check device existence")
9770     self._CheckDisksExistence([self.instance.primary_node])
9771     self._CheckVolumeGroup([self.instance.primary_node])
9772
9773     # Step: check other node consistency
9774     self.lu.LogStep(2, steps_total, "Check peer consistency")
9775     self._CheckDisksConsistency(self.instance.primary_node, True, True)
9776
9777     # Step: create new storage
9778     self.lu.LogStep(3, steps_total, "Allocate new storage")
9779     for idx, dev in enumerate(self.instance.disks):
9780       self.lu.LogInfo("Adding new local storage on %s for disk/%d" %
9781                       (self.new_node, idx))
9782       # we pass force_create=True to force LVM creation
9783       for new_lv in dev.children:
9784         _CreateBlockDev(self.lu, self.new_node, self.instance, new_lv, True,
9785                         _GetInstanceInfoText(self.instance), False)
9786
9787     # Step 4: dbrd minors and drbd setups changes
9788     # after this, we must manually remove the drbd minors on both the
9789     # error and the success paths
9790     self.lu.LogStep(4, steps_total, "Changing drbd configuration")
9791     minors = self.cfg.AllocateDRBDMinor([self.new_node
9792                                          for dev in self.instance.disks],
9793                                         self.instance.name)
9794     logging.debug("Allocated minors %r", minors)
9795
9796     iv_names = {}
9797     for idx, (dev, new_minor) in enumerate(zip(self.instance.disks, minors)):
9798       self.lu.LogInfo("activating a new drbd on %s for disk/%d" %
9799                       (self.new_node, idx))
9800       # create new devices on new_node; note that we create two IDs:
9801       # one without port, so the drbd will be activated without
9802       # networking information on the new node at this stage, and one
9803       # with network, for the latter activation in step 4
9804       (o_node1, o_node2, o_port, o_minor1, o_minor2, o_secret) = dev.logical_id
9805       if self.instance.primary_node == o_node1:
9806         p_minor = o_minor1
9807       else:
9808         assert self.instance.primary_node == o_node2, "Three-node instance?"
9809         p_minor = o_minor2
9810
9811       new_alone_id = (self.instance.primary_node, self.new_node, None,
9812                       p_minor, new_minor, o_secret)
9813       new_net_id = (self.instance.primary_node, self.new_node, o_port,
9814                     p_minor, new_minor, o_secret)
9815
9816       iv_names[idx] = (dev, dev.children, new_net_id)
9817       logging.debug("Allocated new_minor: %s, new_logical_id: %s", new_minor,
9818                     new_net_id)
9819       new_drbd = objects.Disk(dev_type=constants.LD_DRBD8,
9820                               logical_id=new_alone_id,
9821                               children=dev.children,
9822                               size=dev.size)
9823       try:
9824         _CreateSingleBlockDev(self.lu, self.new_node, self.instance, new_drbd,
9825                               _GetInstanceInfoText(self.instance), False)
9826       except errors.GenericError:
9827         self.cfg.ReleaseDRBDMinors(self.instance.name)
9828         raise
9829
9830     # We have new devices, shutdown the drbd on the old secondary
9831     for idx, dev in enumerate(self.instance.disks):
9832       self.lu.LogInfo("Shutting down drbd for disk/%d on old node" % idx)
9833       self.cfg.SetDiskID(dev, self.target_node)
9834       msg = self.rpc.call_blockdev_shutdown(self.target_node, dev).fail_msg
9835       if msg:
9836         self.lu.LogWarning("Failed to shutdown drbd for disk/%d on old"
9837                            "node: %s" % (idx, msg),
9838                            hint=("Please cleanup this device manually as"
9839                                  " soon as possible"))
9840
9841     self.lu.LogInfo("Detaching primary drbds from the network (=> standalone)")
9842     result = self.rpc.call_drbd_disconnect_net([self.instance.primary_node],
9843                                                self.node_secondary_ip,
9844                                                self.instance.disks)\
9845                                               [self.instance.primary_node]
9846
9847     msg = result.fail_msg
9848     if msg:
9849       # detaches didn't succeed (unlikely)
9850       self.cfg.ReleaseDRBDMinors(self.instance.name)
9851       raise errors.OpExecError("Can't detach the disks from the network on"
9852                                " old node: %s" % (msg,))
9853
9854     # if we managed to detach at least one, we update all the disks of
9855     # the instance to point to the new secondary
9856     self.lu.LogInfo("Updating instance configuration")
9857     for dev, _, new_logical_id in iv_names.itervalues():
9858       dev.logical_id = new_logical_id
9859       self.cfg.SetDiskID(dev, self.instance.primary_node)
9860
9861     self.cfg.Update(self.instance, feedback_fn)
9862
9863     # and now perform the drbd attach
9864     self.lu.LogInfo("Attaching primary drbds to new secondary"
9865                     " (standalone => connected)")
9866     result = self.rpc.call_drbd_attach_net([self.instance.primary_node,
9867                                             self.new_node],
9868                                            self.node_secondary_ip,
9869                                            self.instance.disks,
9870                                            self.instance.name,
9871                                            False)
9872     for to_node, to_result in result.items():
9873       msg = to_result.fail_msg
9874       if msg:
9875         self.lu.LogWarning("Can't attach drbd disks on node %s: %s",
9876                            to_node, msg,
9877                            hint=("please do a gnt-instance info to see the"
9878                                  " status of disks"))
9879     cstep = 5
9880     if self.early_release:
9881       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9882       cstep += 1
9883       self._RemoveOldStorage(self.target_node, iv_names)
9884       # WARNING: we release all node locks here, do not do other RPCs
9885       # than WaitForSync to the primary node
9886       _ReleaseLocks(self.lu, locking.LEVEL_NODE,
9887                     names=[self.instance.primary_node,
9888                            self.target_node,
9889                            self.new_node])
9890
9891     # Wait for sync
9892     # This can fail as the old devices are degraded and _WaitForSync
9893     # does a combined result over all disks, so we don't check its return value
9894     self.lu.LogStep(cstep, steps_total, "Sync devices")
9895     cstep += 1
9896     _WaitForSync(self.lu, self.instance)
9897
9898     # Check all devices manually
9899     self._CheckDevices(self.instance.primary_node, iv_names)
9900
9901     # Step: remove old storage
9902     if not self.early_release:
9903       self.lu.LogStep(cstep, steps_total, "Removing old storage")
9904       self._RemoveOldStorage(self.target_node, iv_names)
9905
9906
9907 class LURepairNodeStorage(NoHooksLU):
9908   """Repairs the volume group on a node.
9909
9910   """
9911   REQ_BGL = False
9912
9913   def CheckArguments(self):
9914     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9915
9916     storage_type = self.op.storage_type
9917
9918     if (constants.SO_FIX_CONSISTENCY not in
9919         constants.VALID_STORAGE_OPERATIONS.get(storage_type, [])):
9920       raise errors.OpPrereqError("Storage units of type '%s' can not be"
9921                                  " repaired" % storage_type,
9922                                  errors.ECODE_INVAL)
9923
9924   def ExpandNames(self):
9925     self.needed_locks = {
9926       locking.LEVEL_NODE: [self.op.node_name],
9927       }
9928
9929   def _CheckFaultyDisks(self, instance, node_name):
9930     """Ensure faulty disks abort the opcode or at least warn."""
9931     try:
9932       if _FindFaultyInstanceDisks(self.cfg, self.rpc, instance,
9933                                   node_name, True):
9934         raise errors.OpPrereqError("Instance '%s' has faulty disks on"
9935                                    " node '%s'" % (instance.name, node_name),
9936                                    errors.ECODE_STATE)
9937     except errors.OpPrereqError, err:
9938       if self.op.ignore_consistency:
9939         self.proc.LogWarning(str(err.args[0]))
9940       else:
9941         raise
9942
9943   def CheckPrereq(self):
9944     """Check prerequisites.
9945
9946     """
9947     # Check whether any instance on this node has faulty disks
9948     for inst in _GetNodeInstances(self.cfg, self.op.node_name):
9949       if not inst.admin_up:
9950         continue
9951       check_nodes = set(inst.all_nodes)
9952       check_nodes.discard(self.op.node_name)
9953       for inst_node_name in check_nodes:
9954         self._CheckFaultyDisks(inst, inst_node_name)
9955
9956   def Exec(self, feedback_fn):
9957     feedback_fn("Repairing storage unit '%s' on %s ..." %
9958                 (self.op.name, self.op.node_name))
9959
9960     st_args = _GetStorageTypeArgs(self.cfg, self.op.storage_type)
9961     result = self.rpc.call_storage_execute(self.op.node_name,
9962                                            self.op.storage_type, st_args,
9963                                            self.op.name,
9964                                            constants.SO_FIX_CONSISTENCY)
9965     result.Raise("Failed to repair storage unit '%s' on %s" %
9966                  (self.op.name, self.op.node_name))
9967
9968
9969 class LUNodeEvacuate(NoHooksLU):
9970   """Evacuates instances off a list of nodes.
9971
9972   """
9973   REQ_BGL = False
9974
9975   def CheckArguments(self):
9976     _CheckIAllocatorOrNode(self, "iallocator", "remote_node")
9977
9978   def ExpandNames(self):
9979     self.op.node_name = _ExpandNodeName(self.cfg, self.op.node_name)
9980
9981     if self.op.remote_node is not None:
9982       self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
9983       assert self.op.remote_node
9984
9985       if self.op.remote_node == self.op.node_name:
9986         raise errors.OpPrereqError("Can not use evacuated node as a new"
9987                                    " secondary node", errors.ECODE_INVAL)
9988
9989       if self.op.mode != constants.IALLOCATOR_NEVAC_SEC:
9990         raise errors.OpPrereqError("Without the use of an iallocator only"
9991                                    " secondary instances can be evacuated",
9992                                    errors.ECODE_INVAL)
9993
9994     # Declare locks
9995     self.share_locks = _ShareAll()
9996     self.needed_locks = {
9997       locking.LEVEL_INSTANCE: [],
9998       locking.LEVEL_NODEGROUP: [],
9999       locking.LEVEL_NODE: [],
10000       }
10001
10002     if self.op.remote_node is None:
10003       # Iallocator will choose any node(s) in the same group
10004       group_nodes = self.cfg.GetNodeGroupMembersByNodes([self.op.node_name])
10005     else:
10006       group_nodes = frozenset([self.op.remote_node])
10007
10008     # Determine nodes to be locked
10009     self.lock_nodes = set([self.op.node_name]) | group_nodes
10010
10011   def _DetermineInstances(self):
10012     """Builds list of instances to operate on.
10013
10014     """
10015     assert self.op.mode in constants.IALLOCATOR_NEVAC_MODES
10016
10017     if self.op.mode == constants.IALLOCATOR_NEVAC_PRI:
10018       # Primary instances only
10019       inst_fn = _GetNodePrimaryInstances
10020       assert self.op.remote_node is None, \
10021         "Evacuating primary instances requires iallocator"
10022     elif self.op.mode == constants.IALLOCATOR_NEVAC_SEC:
10023       # Secondary instances only
10024       inst_fn = _GetNodeSecondaryInstances
10025     else:
10026       # All instances
10027       assert self.op.mode == constants.IALLOCATOR_NEVAC_ALL
10028       inst_fn = _GetNodeInstances
10029
10030     return inst_fn(self.cfg, self.op.node_name)
10031
10032   def DeclareLocks(self, level):
10033     if level == locking.LEVEL_INSTANCE:
10034       # Lock instances optimistically, needs verification once node and group
10035       # locks have been acquired
10036       self.needed_locks[locking.LEVEL_INSTANCE] = \
10037         set(i.name for i in self._DetermineInstances())
10038
10039     elif level == locking.LEVEL_NODEGROUP:
10040       # Lock node groups optimistically, needs verification once nodes have
10041       # been acquired
10042       self.needed_locks[locking.LEVEL_NODEGROUP] = \
10043         self.cfg.GetNodeGroupsFromNodes(self.lock_nodes)
10044
10045     elif level == locking.LEVEL_NODE:
10046       self.needed_locks[locking.LEVEL_NODE] = self.lock_nodes
10047
10048   def CheckPrereq(self):
10049     # Verify locks
10050     owned_instances = self.glm.list_owned(locking.LEVEL_INSTANCE)
10051     owned_nodes = self.glm.list_owned(locking.LEVEL_NODE)
10052     owned_groups = self.glm.list_owned(locking.LEVEL_NODEGROUP)
10053
10054     assert owned_nodes == self.lock_nodes
10055
10056     wanted_groups = self.cfg.GetNodeGroupsFromNodes(owned_nodes)
10057     if owned_groups != wanted_groups:
10058       raise errors.OpExecError("Node groups changed since locks were acquired,"
10059                                " current groups are '%s', used to be '%s'" %
10060                                (utils.CommaJoin(wanted_groups),
10061                                 utils.CommaJoin(owned_groups)))
10062
10063     # Determine affected instances
10064     self.instances = self._DetermineInstances()
10065     self.instance_names = [i.name for i in self.instances]
10066
10067     if set(self.instance_names) != owned_instances:
10068       raise errors.OpExecError("Instances on node '%s' changed since locks"
10069                                " were acquired, current instances are '%s',"
10070                                " used to be '%s'" %
10071                                (self.op.node_name,
10072                                 utils.CommaJoin(self.instance_names),
10073                                 utils.CommaJoin(owned_instances)))
10074
10075     if self.instance_names:
10076       self.LogInfo("Evacuating instances from node '%s': %s",
10077                    self.op.node_name,
10078                    utils.CommaJoin(utils.NiceSort(self.instance_names)))
10079     else:
10080       self.LogInfo("No instances to evacuate from node '%s'",
10081                    self.op.node_name)
10082
10083     if self.op.remote_node is not None:
10084       for i in self.instances:
10085         if i.primary_node == self.op.remote_node:
10086           raise errors.OpPrereqError("Node %s is the primary node of"
10087                                      " instance %s, cannot use it as"
10088                                      " secondary" %
10089                                      (self.op.remote_node, i.name),
10090                                      errors.ECODE_INVAL)
10091
10092   def Exec(self, feedback_fn):
10093     assert (self.op.iallocator is not None) ^ (self.op.remote_node is not None)
10094
10095     if not self.instance_names:
10096       # No instances to evacuate
10097       jobs = []
10098
10099     elif self.op.iallocator is not None:
10100       # TODO: Implement relocation to other group
10101       ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_NODE_EVAC,
10102                        evac_mode=self.op.mode,
10103                        instances=list(self.instance_names))
10104
10105       ial.Run(self.op.iallocator)
10106
10107       if not ial.success:
10108         raise errors.OpPrereqError("Can't compute node evacuation using"
10109                                    " iallocator '%s': %s" %
10110                                    (self.op.iallocator, ial.info),
10111                                    errors.ECODE_NORES)
10112
10113       jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, True)
10114
10115     elif self.op.remote_node is not None:
10116       assert self.op.mode == constants.IALLOCATOR_NEVAC_SEC
10117       jobs = [
10118         [opcodes.OpInstanceReplaceDisks(instance_name=instance_name,
10119                                         remote_node=self.op.remote_node,
10120                                         disks=[],
10121                                         mode=constants.REPLACE_DISK_CHG,
10122                                         early_release=self.op.early_release)]
10123         for instance_name in self.instance_names
10124         ]
10125
10126     else:
10127       raise errors.ProgrammerError("No iallocator or remote node")
10128
10129     return ResultWithJobs(jobs)
10130
10131
10132 def _SetOpEarlyRelease(early_release, op):
10133   """Sets C{early_release} flag on opcodes if available.
10134
10135   """
10136   try:
10137     op.early_release = early_release
10138   except AttributeError:
10139     assert not isinstance(op, opcodes.OpInstanceReplaceDisks)
10140
10141   return op
10142
10143
10144 def _NodeEvacDest(use_nodes, group, nodes):
10145   """Returns group or nodes depending on caller's choice.
10146
10147   """
10148   if use_nodes:
10149     return utils.CommaJoin(nodes)
10150   else:
10151     return group
10152
10153
10154 def _LoadNodeEvacResult(lu, alloc_result, early_release, use_nodes):
10155   """Unpacks the result of change-group and node-evacuate iallocator requests.
10156
10157   Iallocator modes L{constants.IALLOCATOR_MODE_NODE_EVAC} and
10158   L{constants.IALLOCATOR_MODE_CHG_GROUP}.
10159
10160   @type lu: L{LogicalUnit}
10161   @param lu: Logical unit instance
10162   @type alloc_result: tuple/list
10163   @param alloc_result: Result from iallocator
10164   @type early_release: bool
10165   @param early_release: Whether to release locks early if possible
10166   @type use_nodes: bool
10167   @param use_nodes: Whether to display node names instead of groups
10168
10169   """
10170   (moved, failed, jobs) = alloc_result
10171
10172   if failed:
10173     lu.LogWarning("Unable to evacuate instances %s",
10174                   utils.CommaJoin("%s (%s)" % (name, reason)
10175                                   for (name, reason) in failed))
10176
10177   if moved:
10178     lu.LogInfo("Instances to be moved: %s",
10179                utils.CommaJoin("%s (to %s)" %
10180                                (name, _NodeEvacDest(use_nodes, group, nodes))
10181                                for (name, group, nodes) in moved))
10182
10183   return [map(compat.partial(_SetOpEarlyRelease, early_release),
10184               map(opcodes.OpCode.LoadOpCode, ops))
10185           for ops in jobs]
10186
10187
10188 class LUInstanceGrowDisk(LogicalUnit):
10189   """Grow a disk of an instance.
10190
10191   """
10192   HPATH = "disk-grow"
10193   HTYPE = constants.HTYPE_INSTANCE
10194   REQ_BGL = False
10195
10196   def ExpandNames(self):
10197     self._ExpandAndLockInstance()
10198     self.needed_locks[locking.LEVEL_NODE] = []
10199     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10200
10201   def DeclareLocks(self, level):
10202     if level == locking.LEVEL_NODE:
10203       self._LockInstancesNodes()
10204
10205   def BuildHooksEnv(self):
10206     """Build hooks env.
10207
10208     This runs on the master, the primary and all the secondaries.
10209
10210     """
10211     env = {
10212       "DISK": self.op.disk,
10213       "AMOUNT": self.op.amount,
10214       }
10215     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
10216     return env
10217
10218   def BuildHooksNodes(self):
10219     """Build hooks nodes.
10220
10221     """
10222     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10223     return (nl, nl)
10224
10225   def CheckPrereq(self):
10226     """Check prerequisites.
10227
10228     This checks that the instance is in the cluster.
10229
10230     """
10231     instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10232     assert instance is not None, \
10233       "Cannot retrieve locked instance %s" % self.op.instance_name
10234     nodenames = list(instance.all_nodes)
10235     for node in nodenames:
10236       _CheckNodeOnline(self, node)
10237
10238     self.instance = instance
10239
10240     if instance.disk_template not in constants.DTS_GROWABLE:
10241       raise errors.OpPrereqError("Instance's disk layout does not support"
10242                                  " growing", errors.ECODE_INVAL)
10243
10244     self.disk = instance.FindDisk(self.op.disk)
10245
10246     if instance.disk_template not in (constants.DT_FILE,
10247                                       constants.DT_SHARED_FILE):
10248       # TODO: check the free disk space for file, when that feature will be
10249       # supported
10250       _CheckNodesFreeDiskPerVG(self, nodenames,
10251                                self.disk.ComputeGrowth(self.op.amount))
10252
10253   def Exec(self, feedback_fn):
10254     """Execute disk grow.
10255
10256     """
10257     instance = self.instance
10258     disk = self.disk
10259
10260     disks_ok, _ = _AssembleInstanceDisks(self, self.instance, disks=[disk])
10261     if not disks_ok:
10262       raise errors.OpExecError("Cannot activate block device to grow")
10263
10264     # First run all grow ops in dry-run mode
10265     for node in instance.all_nodes:
10266       self.cfg.SetDiskID(disk, node)
10267       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, True)
10268       result.Raise("Grow request failed to node %s" % node)
10269
10270     # We know that (as far as we can test) operations across different
10271     # nodes will succeed, time to run it for real
10272     for node in instance.all_nodes:
10273       self.cfg.SetDiskID(disk, node)
10274       result = self.rpc.call_blockdev_grow(node, disk, self.op.amount, False)
10275       result.Raise("Grow request failed to node %s" % node)
10276
10277       # TODO: Rewrite code to work properly
10278       # DRBD goes into sync mode for a short amount of time after executing the
10279       # "resize" command. DRBD 8.x below version 8.0.13 contains a bug whereby
10280       # calling "resize" in sync mode fails. Sleeping for a short amount of
10281       # time is a work-around.
10282       time.sleep(5)
10283
10284     disk.RecordGrow(self.op.amount)
10285     self.cfg.Update(instance, feedback_fn)
10286     if self.op.wait_for_sync:
10287       disk_abort = not _WaitForSync(self, instance, disks=[disk])
10288       if disk_abort:
10289         self.proc.LogWarning("Disk sync-ing has not returned a good"
10290                              " status; please check the instance")
10291       if not instance.admin_up:
10292         _SafeShutdownInstanceDisks(self, instance, disks=[disk])
10293     elif not instance.admin_up:
10294       self.proc.LogWarning("Not shutting down the disk even if the instance is"
10295                            " not supposed to be running because no wait for"
10296                            " sync mode was requested")
10297
10298
10299 class LUInstanceQueryData(NoHooksLU):
10300   """Query runtime instance data.
10301
10302   """
10303   REQ_BGL = False
10304
10305   def ExpandNames(self):
10306     self.needed_locks = {}
10307
10308     # Use locking if requested or when non-static information is wanted
10309     if not (self.op.static or self.op.use_locking):
10310       self.LogWarning("Non-static data requested, locks need to be acquired")
10311       self.op.use_locking = True
10312
10313     if self.op.instances or not self.op.use_locking:
10314       # Expand instance names right here
10315       self.wanted_names = _GetWantedInstances(self, self.op.instances)
10316     else:
10317       # Will use acquired locks
10318       self.wanted_names = None
10319
10320     if self.op.use_locking:
10321       self.share_locks = _ShareAll()
10322
10323       if self.wanted_names is None:
10324         self.needed_locks[locking.LEVEL_INSTANCE] = locking.ALL_SET
10325       else:
10326         self.needed_locks[locking.LEVEL_INSTANCE] = self.wanted_names
10327
10328       self.needed_locks[locking.LEVEL_NODE] = []
10329       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10330
10331   def DeclareLocks(self, level):
10332     if self.op.use_locking and level == locking.LEVEL_NODE:
10333       self._LockInstancesNodes()
10334
10335   def CheckPrereq(self):
10336     """Check prerequisites.
10337
10338     This only checks the optional instance list against the existing names.
10339
10340     """
10341     if self.wanted_names is None:
10342       assert self.op.use_locking, "Locking was not used"
10343       self.wanted_names = self.glm.list_owned(locking.LEVEL_INSTANCE)
10344
10345     self.wanted_instances = \
10346         map(compat.snd, self.cfg.GetMultiInstanceInfo(self.wanted_names))
10347
10348   def _ComputeBlockdevStatus(self, node, instance_name, dev):
10349     """Returns the status of a block device
10350
10351     """
10352     if self.op.static or not node:
10353       return None
10354
10355     self.cfg.SetDiskID(dev, node)
10356
10357     result = self.rpc.call_blockdev_find(node, dev)
10358     if result.offline:
10359       return None
10360
10361     result.Raise("Can't compute disk status for %s" % instance_name)
10362
10363     status = result.payload
10364     if status is None:
10365       return None
10366
10367     return (status.dev_path, status.major, status.minor,
10368             status.sync_percent, status.estimated_time,
10369             status.is_degraded, status.ldisk_status)
10370
10371   def _ComputeDiskStatus(self, instance, snode, dev):
10372     """Compute block device status.
10373
10374     """
10375     if dev.dev_type in constants.LDS_DRBD:
10376       # we change the snode then (otherwise we use the one passed in)
10377       if dev.logical_id[0] == instance.primary_node:
10378         snode = dev.logical_id[1]
10379       else:
10380         snode = dev.logical_id[0]
10381
10382     dev_pstatus = self._ComputeBlockdevStatus(instance.primary_node,
10383                                               instance.name, dev)
10384     dev_sstatus = self._ComputeBlockdevStatus(snode, instance.name, dev)
10385
10386     if dev.children:
10387       dev_children = map(compat.partial(self._ComputeDiskStatus,
10388                                         instance, snode),
10389                          dev.children)
10390     else:
10391       dev_children = []
10392
10393     return {
10394       "iv_name": dev.iv_name,
10395       "dev_type": dev.dev_type,
10396       "logical_id": dev.logical_id,
10397       "physical_id": dev.physical_id,
10398       "pstatus": dev_pstatus,
10399       "sstatus": dev_sstatus,
10400       "children": dev_children,
10401       "mode": dev.mode,
10402       "size": dev.size,
10403       }
10404
10405   def Exec(self, feedback_fn):
10406     """Gather and return data"""
10407     result = {}
10408
10409     cluster = self.cfg.GetClusterInfo()
10410
10411     pri_nodes = self.cfg.GetMultiNodeInfo(i.primary_node
10412                                           for i in self.wanted_instances)
10413     for instance, (_, pnode) in zip(self.wanted_instances, pri_nodes):
10414       if self.op.static or pnode.offline:
10415         remote_state = None
10416         if pnode.offline:
10417           self.LogWarning("Primary node %s is marked offline, returning static"
10418                           " information only for instance %s" %
10419                           (pnode.name, instance.name))
10420       else:
10421         remote_info = self.rpc.call_instance_info(instance.primary_node,
10422                                                   instance.name,
10423                                                   instance.hypervisor)
10424         remote_info.Raise("Error checking node %s" % instance.primary_node)
10425         remote_info = remote_info.payload
10426         if remote_info and "state" in remote_info:
10427           remote_state = "up"
10428         else:
10429           remote_state = "down"
10430
10431       if instance.admin_up:
10432         config_state = "up"
10433       else:
10434         config_state = "down"
10435
10436       disks = map(compat.partial(self._ComputeDiskStatus, instance, None),
10437                   instance.disks)
10438
10439       result[instance.name] = {
10440         "name": instance.name,
10441         "config_state": config_state,
10442         "run_state": remote_state,
10443         "pnode": instance.primary_node,
10444         "snodes": instance.secondary_nodes,
10445         "os": instance.os,
10446         # this happens to be the same format used for hooks
10447         "nics": _NICListToTuple(self, instance.nics),
10448         "disk_template": instance.disk_template,
10449         "disks": disks,
10450         "hypervisor": instance.hypervisor,
10451         "network_port": instance.network_port,
10452         "hv_instance": instance.hvparams,
10453         "hv_actual": cluster.FillHV(instance, skip_globals=True),
10454         "be_instance": instance.beparams,
10455         "be_actual": cluster.FillBE(instance),
10456         "os_instance": instance.osparams,
10457         "os_actual": cluster.SimpleFillOS(instance.os, instance.osparams),
10458         "serial_no": instance.serial_no,
10459         "mtime": instance.mtime,
10460         "ctime": instance.ctime,
10461         "uuid": instance.uuid,
10462         }
10463
10464     return result
10465
10466
10467 class LUInstanceSetParams(LogicalUnit):
10468   """Modifies an instances's parameters.
10469
10470   """
10471   HPATH = "instance-modify"
10472   HTYPE = constants.HTYPE_INSTANCE
10473   REQ_BGL = False
10474
10475   def CheckArguments(self):
10476     if not (self.op.nics or self.op.disks or self.op.disk_template or
10477             self.op.hvparams or self.op.beparams or self.op.os_name):
10478       raise errors.OpPrereqError("No changes submitted", errors.ECODE_INVAL)
10479
10480     if self.op.hvparams:
10481       _CheckGlobalHvParams(self.op.hvparams)
10482
10483     # Disk validation
10484     disk_addremove = 0
10485     for disk_op, disk_dict in self.op.disks:
10486       utils.ForceDictType(disk_dict, constants.IDISK_PARAMS_TYPES)
10487       if disk_op == constants.DDM_REMOVE:
10488         disk_addremove += 1
10489         continue
10490       elif disk_op == constants.DDM_ADD:
10491         disk_addremove += 1
10492       else:
10493         if not isinstance(disk_op, int):
10494           raise errors.OpPrereqError("Invalid disk index", errors.ECODE_INVAL)
10495         if not isinstance(disk_dict, dict):
10496           msg = "Invalid disk value: expected dict, got '%s'" % disk_dict
10497           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10498
10499       if disk_op == constants.DDM_ADD:
10500         mode = disk_dict.setdefault(constants.IDISK_MODE, constants.DISK_RDWR)
10501         if mode not in constants.DISK_ACCESS_SET:
10502           raise errors.OpPrereqError("Invalid disk access mode '%s'" % mode,
10503                                      errors.ECODE_INVAL)
10504         size = disk_dict.get(constants.IDISK_SIZE, None)
10505         if size is None:
10506           raise errors.OpPrereqError("Required disk parameter size missing",
10507                                      errors.ECODE_INVAL)
10508         try:
10509           size = int(size)
10510         except (TypeError, ValueError), err:
10511           raise errors.OpPrereqError("Invalid disk size parameter: %s" %
10512                                      str(err), errors.ECODE_INVAL)
10513         disk_dict[constants.IDISK_SIZE] = size
10514       else:
10515         # modification of disk
10516         if constants.IDISK_SIZE in disk_dict:
10517           raise errors.OpPrereqError("Disk size change not possible, use"
10518                                      " grow-disk", errors.ECODE_INVAL)
10519
10520     if disk_addremove > 1:
10521       raise errors.OpPrereqError("Only one disk add or remove operation"
10522                                  " supported at a time", errors.ECODE_INVAL)
10523
10524     if self.op.disks and self.op.disk_template is not None:
10525       raise errors.OpPrereqError("Disk template conversion and other disk"
10526                                  " changes not supported at the same time",
10527                                  errors.ECODE_INVAL)
10528
10529     if (self.op.disk_template and
10530         self.op.disk_template in constants.DTS_INT_MIRROR and
10531         self.op.remote_node is None):
10532       raise errors.OpPrereqError("Changing the disk template to a mirrored"
10533                                  " one requires specifying a secondary node",
10534                                  errors.ECODE_INVAL)
10535
10536     # NIC validation
10537     nic_addremove = 0
10538     for nic_op, nic_dict in self.op.nics:
10539       utils.ForceDictType(nic_dict, constants.INIC_PARAMS_TYPES)
10540       if nic_op == constants.DDM_REMOVE:
10541         nic_addremove += 1
10542         continue
10543       elif nic_op == constants.DDM_ADD:
10544         nic_addremove += 1
10545       else:
10546         if not isinstance(nic_op, int):
10547           raise errors.OpPrereqError("Invalid nic index", errors.ECODE_INVAL)
10548         if not isinstance(nic_dict, dict):
10549           msg = "Invalid nic value: expected dict, got '%s'" % nic_dict
10550           raise errors.OpPrereqError(msg, errors.ECODE_INVAL)
10551
10552       # nic_dict should be a dict
10553       nic_ip = nic_dict.get(constants.INIC_IP, None)
10554       if nic_ip is not None:
10555         if nic_ip.lower() == constants.VALUE_NONE:
10556           nic_dict[constants.INIC_IP] = None
10557         else:
10558           if not netutils.IPAddress.IsValid(nic_ip):
10559             raise errors.OpPrereqError("Invalid IP address '%s'" % nic_ip,
10560                                        errors.ECODE_INVAL)
10561
10562       nic_bridge = nic_dict.get("bridge", None)
10563       nic_link = nic_dict.get(constants.INIC_LINK, None)
10564       if nic_bridge and nic_link:
10565         raise errors.OpPrereqError("Cannot pass 'bridge' and 'link'"
10566                                    " at the same time", errors.ECODE_INVAL)
10567       elif nic_bridge and nic_bridge.lower() == constants.VALUE_NONE:
10568         nic_dict["bridge"] = None
10569       elif nic_link and nic_link.lower() == constants.VALUE_NONE:
10570         nic_dict[constants.INIC_LINK] = None
10571
10572       if nic_op == constants.DDM_ADD:
10573         nic_mac = nic_dict.get(constants.INIC_MAC, None)
10574         if nic_mac is None:
10575           nic_dict[constants.INIC_MAC] = constants.VALUE_AUTO
10576
10577       if constants.INIC_MAC in nic_dict:
10578         nic_mac = nic_dict[constants.INIC_MAC]
10579         if nic_mac not in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10580           nic_mac = utils.NormalizeAndValidateMac(nic_mac)
10581
10582         if nic_op != constants.DDM_ADD and nic_mac == constants.VALUE_AUTO:
10583           raise errors.OpPrereqError("'auto' is not a valid MAC address when"
10584                                      " modifying an existing nic",
10585                                      errors.ECODE_INVAL)
10586
10587     if nic_addremove > 1:
10588       raise errors.OpPrereqError("Only one NIC add or remove operation"
10589                                  " supported at a time", errors.ECODE_INVAL)
10590
10591   def ExpandNames(self):
10592     self._ExpandAndLockInstance()
10593     self.needed_locks[locking.LEVEL_NODE] = []
10594     self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_REPLACE
10595
10596   def DeclareLocks(self, level):
10597     if level == locking.LEVEL_NODE:
10598       self._LockInstancesNodes()
10599       if self.op.disk_template and self.op.remote_node:
10600         self.op.remote_node = _ExpandNodeName(self.cfg, self.op.remote_node)
10601         self.needed_locks[locking.LEVEL_NODE].append(self.op.remote_node)
10602
10603   def BuildHooksEnv(self):
10604     """Build hooks env.
10605
10606     This runs on the master, primary and secondaries.
10607
10608     """
10609     args = dict()
10610     if constants.BE_MEMORY in self.be_new:
10611       args["memory"] = self.be_new[constants.BE_MEMORY]
10612     if constants.BE_VCPUS in self.be_new:
10613       args["vcpus"] = self.be_new[constants.BE_VCPUS]
10614     # TODO: export disk changes. Note: _BuildInstanceHookEnv* don't export disk
10615     # information at all.
10616     if self.op.nics:
10617       args["nics"] = []
10618       nic_override = dict(self.op.nics)
10619       for idx, nic in enumerate(self.instance.nics):
10620         if idx in nic_override:
10621           this_nic_override = nic_override[idx]
10622         else:
10623           this_nic_override = {}
10624         if constants.INIC_IP in this_nic_override:
10625           ip = this_nic_override[constants.INIC_IP]
10626         else:
10627           ip = nic.ip
10628         if constants.INIC_MAC in this_nic_override:
10629           mac = this_nic_override[constants.INIC_MAC]
10630         else:
10631           mac = nic.mac
10632         if idx in self.nic_pnew:
10633           nicparams = self.nic_pnew[idx]
10634         else:
10635           nicparams = self.cluster.SimpleFillNIC(nic.nicparams)
10636         mode = nicparams[constants.NIC_MODE]
10637         link = nicparams[constants.NIC_LINK]
10638         args["nics"].append((ip, mac, mode, link))
10639       if constants.DDM_ADD in nic_override:
10640         ip = nic_override[constants.DDM_ADD].get(constants.INIC_IP, None)
10641         mac = nic_override[constants.DDM_ADD][constants.INIC_MAC]
10642         nicparams = self.nic_pnew[constants.DDM_ADD]
10643         mode = nicparams[constants.NIC_MODE]
10644         link = nicparams[constants.NIC_LINK]
10645         args["nics"].append((ip, mac, mode, link))
10646       elif constants.DDM_REMOVE in nic_override:
10647         del args["nics"][-1]
10648
10649     env = _BuildInstanceHookEnvByObject(self, self.instance, override=args)
10650     if self.op.disk_template:
10651       env["NEW_DISK_TEMPLATE"] = self.op.disk_template
10652
10653     return env
10654
10655   def BuildHooksNodes(self):
10656     """Build hooks nodes.
10657
10658     """
10659     nl = [self.cfg.GetMasterNode()] + list(self.instance.all_nodes)
10660     return (nl, nl)
10661
10662   def CheckPrereq(self):
10663     """Check prerequisites.
10664
10665     This only checks the instance list against the existing names.
10666
10667     """
10668     # checking the new params on the primary/secondary nodes
10669
10670     instance = self.instance = self.cfg.GetInstanceInfo(self.op.instance_name)
10671     cluster = self.cluster = self.cfg.GetClusterInfo()
10672     assert self.instance is not None, \
10673       "Cannot retrieve locked instance %s" % self.op.instance_name
10674     pnode = instance.primary_node
10675     nodelist = list(instance.all_nodes)
10676
10677     # OS change
10678     if self.op.os_name and not self.op.force:
10679       _CheckNodeHasOS(self, instance.primary_node, self.op.os_name,
10680                       self.op.force_variant)
10681       instance_os = self.op.os_name
10682     else:
10683       instance_os = instance.os
10684
10685     if self.op.disk_template:
10686       if instance.disk_template == self.op.disk_template:
10687         raise errors.OpPrereqError("Instance already has disk template %s" %
10688                                    instance.disk_template, errors.ECODE_INVAL)
10689
10690       if (instance.disk_template,
10691           self.op.disk_template) not in self._DISK_CONVERSIONS:
10692         raise errors.OpPrereqError("Unsupported disk template conversion from"
10693                                    " %s to %s" % (instance.disk_template,
10694                                                   self.op.disk_template),
10695                                    errors.ECODE_INVAL)
10696       _CheckInstanceDown(self, instance, "cannot change disk template")
10697       if self.op.disk_template in constants.DTS_INT_MIRROR:
10698         if self.op.remote_node == pnode:
10699           raise errors.OpPrereqError("Given new secondary node %s is the same"
10700                                      " as the primary node of the instance" %
10701                                      self.op.remote_node, errors.ECODE_STATE)
10702         _CheckNodeOnline(self, self.op.remote_node)
10703         _CheckNodeNotDrained(self, self.op.remote_node)
10704         # FIXME: here we assume that the old instance type is DT_PLAIN
10705         assert instance.disk_template == constants.DT_PLAIN
10706         disks = [{constants.IDISK_SIZE: d.size,
10707                   constants.IDISK_VG: d.logical_id[0]}
10708                  for d in instance.disks]
10709         required = _ComputeDiskSizePerVG(self.op.disk_template, disks)
10710         _CheckNodesFreeDiskPerVG(self, [self.op.remote_node], required)
10711
10712     # hvparams processing
10713     if self.op.hvparams:
10714       hv_type = instance.hypervisor
10715       i_hvdict = _GetUpdatedParams(instance.hvparams, self.op.hvparams)
10716       utils.ForceDictType(i_hvdict, constants.HVS_PARAMETER_TYPES)
10717       hv_new = cluster.SimpleFillHV(hv_type, instance.os, i_hvdict)
10718
10719       # local check
10720       hypervisor.GetHypervisor(hv_type).CheckParameterSyntax(hv_new)
10721       _CheckHVParams(self, nodelist, instance.hypervisor, hv_new)
10722       self.hv_new = hv_new # the new actual values
10723       self.hv_inst = i_hvdict # the new dict (without defaults)
10724     else:
10725       self.hv_new = self.hv_inst = {}
10726
10727     # beparams processing
10728     if self.op.beparams:
10729       i_bedict = _GetUpdatedParams(instance.beparams, self.op.beparams,
10730                                    use_none=True)
10731       utils.ForceDictType(i_bedict, constants.BES_PARAMETER_TYPES)
10732       be_new = cluster.SimpleFillBE(i_bedict)
10733       self.be_new = be_new # the new actual values
10734       self.be_inst = i_bedict # the new dict (without defaults)
10735     else:
10736       self.be_new = self.be_inst = {}
10737     be_old = cluster.FillBE(instance)
10738
10739     # osparams processing
10740     if self.op.osparams:
10741       i_osdict = _GetUpdatedParams(instance.osparams, self.op.osparams)
10742       _CheckOSParams(self, True, nodelist, instance_os, i_osdict)
10743       self.os_inst = i_osdict # the new dict (without defaults)
10744     else:
10745       self.os_inst = {}
10746
10747     self.warn = []
10748
10749     if (constants.BE_MEMORY in self.op.beparams and not self.op.force and
10750         be_new[constants.BE_MEMORY] > be_old[constants.BE_MEMORY]):
10751       mem_check_list = [pnode]
10752       if be_new[constants.BE_AUTO_BALANCE]:
10753         # either we changed auto_balance to yes or it was from before
10754         mem_check_list.extend(instance.secondary_nodes)
10755       instance_info = self.rpc.call_instance_info(pnode, instance.name,
10756                                                   instance.hypervisor)
10757       nodeinfo = self.rpc.call_node_info(mem_check_list, None,
10758                                          instance.hypervisor)
10759       pninfo = nodeinfo[pnode]
10760       msg = pninfo.fail_msg
10761       if msg:
10762         # Assume the primary node is unreachable and go ahead
10763         self.warn.append("Can't get info from primary node %s: %s" %
10764                          (pnode,  msg))
10765       elif not isinstance(pninfo.payload.get("memory_free", None), int):
10766         self.warn.append("Node data from primary node %s doesn't contain"
10767                          " free memory information" % pnode)
10768       elif instance_info.fail_msg:
10769         self.warn.append("Can't get instance runtime information: %s" %
10770                         instance_info.fail_msg)
10771       else:
10772         if instance_info.payload:
10773           current_mem = int(instance_info.payload["memory"])
10774         else:
10775           # Assume instance not running
10776           # (there is a slight race condition here, but it's not very probable,
10777           # and we have no other way to check)
10778           current_mem = 0
10779         miss_mem = (be_new[constants.BE_MEMORY] - current_mem -
10780                     pninfo.payload["memory_free"])
10781         if miss_mem > 0:
10782           raise errors.OpPrereqError("This change will prevent the instance"
10783                                      " from starting, due to %d MB of memory"
10784                                      " missing on its primary node" % miss_mem,
10785                                      errors.ECODE_NORES)
10786
10787       if be_new[constants.BE_AUTO_BALANCE]:
10788         for node, nres in nodeinfo.items():
10789           if node not in instance.secondary_nodes:
10790             continue
10791           nres.Raise("Can't get info from secondary node %s" % node,
10792                      prereq=True, ecode=errors.ECODE_STATE)
10793           if not isinstance(nres.payload.get("memory_free", None), int):
10794             raise errors.OpPrereqError("Secondary node %s didn't return free"
10795                                        " memory information" % node,
10796                                        errors.ECODE_STATE)
10797           elif be_new[constants.BE_MEMORY] > nres.payload["memory_free"]:
10798             raise errors.OpPrereqError("This change will prevent the instance"
10799                                        " from failover to its secondary node"
10800                                        " %s, due to not enough memory" % node,
10801                                        errors.ECODE_STATE)
10802
10803     # NIC processing
10804     self.nic_pnew = {}
10805     self.nic_pinst = {}
10806     for nic_op, nic_dict in self.op.nics:
10807       if nic_op == constants.DDM_REMOVE:
10808         if not instance.nics:
10809           raise errors.OpPrereqError("Instance has no NICs, cannot remove",
10810                                      errors.ECODE_INVAL)
10811         continue
10812       if nic_op != constants.DDM_ADD:
10813         # an existing nic
10814         if not instance.nics:
10815           raise errors.OpPrereqError("Invalid NIC index %s, instance has"
10816                                      " no NICs" % nic_op,
10817                                      errors.ECODE_INVAL)
10818         if nic_op < 0 or nic_op >= len(instance.nics):
10819           raise errors.OpPrereqError("Invalid NIC index %s, valid values"
10820                                      " are 0 to %d" %
10821                                      (nic_op, len(instance.nics) - 1),
10822                                      errors.ECODE_INVAL)
10823         old_nic_params = instance.nics[nic_op].nicparams
10824         old_nic_ip = instance.nics[nic_op].ip
10825       else:
10826         old_nic_params = {}
10827         old_nic_ip = None
10828
10829       update_params_dict = dict([(key, nic_dict[key])
10830                                  for key in constants.NICS_PARAMETERS
10831                                  if key in nic_dict])
10832
10833       if "bridge" in nic_dict:
10834         update_params_dict[constants.NIC_LINK] = nic_dict["bridge"]
10835
10836       new_nic_params = _GetUpdatedParams(old_nic_params,
10837                                          update_params_dict)
10838       utils.ForceDictType(new_nic_params, constants.NICS_PARAMETER_TYPES)
10839       new_filled_nic_params = cluster.SimpleFillNIC(new_nic_params)
10840       objects.NIC.CheckParameterSyntax(new_filled_nic_params)
10841       self.nic_pinst[nic_op] = new_nic_params
10842       self.nic_pnew[nic_op] = new_filled_nic_params
10843       new_nic_mode = new_filled_nic_params[constants.NIC_MODE]
10844
10845       if new_nic_mode == constants.NIC_MODE_BRIDGED:
10846         nic_bridge = new_filled_nic_params[constants.NIC_LINK]
10847         msg = self.rpc.call_bridges_exist(pnode, [nic_bridge]).fail_msg
10848         if msg:
10849           msg = "Error checking bridges on node %s: %s" % (pnode, msg)
10850           if self.op.force:
10851             self.warn.append(msg)
10852           else:
10853             raise errors.OpPrereqError(msg, errors.ECODE_ENVIRON)
10854       if new_nic_mode == constants.NIC_MODE_ROUTED:
10855         if constants.INIC_IP in nic_dict:
10856           nic_ip = nic_dict[constants.INIC_IP]
10857         else:
10858           nic_ip = old_nic_ip
10859         if nic_ip is None:
10860           raise errors.OpPrereqError("Cannot set the nic ip to None"
10861                                      " on a routed nic", errors.ECODE_INVAL)
10862       if constants.INIC_MAC in nic_dict:
10863         nic_mac = nic_dict[constants.INIC_MAC]
10864         if nic_mac is None:
10865           raise errors.OpPrereqError("Cannot set the nic mac to None",
10866                                      errors.ECODE_INVAL)
10867         elif nic_mac in (constants.VALUE_AUTO, constants.VALUE_GENERATE):
10868           # otherwise generate the mac
10869           nic_dict[constants.INIC_MAC] = \
10870             self.cfg.GenerateMAC(self.proc.GetECId())
10871         else:
10872           # or validate/reserve the current one
10873           try:
10874             self.cfg.ReserveMAC(nic_mac, self.proc.GetECId())
10875           except errors.ReservationError:
10876             raise errors.OpPrereqError("MAC address %s already in use"
10877                                        " in cluster" % nic_mac,
10878                                        errors.ECODE_NOTUNIQUE)
10879
10880     # DISK processing
10881     if self.op.disks and instance.disk_template == constants.DT_DISKLESS:
10882       raise errors.OpPrereqError("Disk operations not supported for"
10883                                  " diskless instances",
10884                                  errors.ECODE_INVAL)
10885     for disk_op, _ in self.op.disks:
10886       if disk_op == constants.DDM_REMOVE:
10887         if len(instance.disks) == 1:
10888           raise errors.OpPrereqError("Cannot remove the last disk of"
10889                                      " an instance", errors.ECODE_INVAL)
10890         _CheckInstanceDown(self, instance, "cannot remove disks")
10891
10892       if (disk_op == constants.DDM_ADD and
10893           len(instance.disks) >= constants.MAX_DISKS):
10894         raise errors.OpPrereqError("Instance has too many disks (%d), cannot"
10895                                    " add more" % constants.MAX_DISKS,
10896                                    errors.ECODE_STATE)
10897       if disk_op not in (constants.DDM_ADD, constants.DDM_REMOVE):
10898         # an existing disk
10899         if disk_op < 0 or disk_op >= len(instance.disks):
10900           raise errors.OpPrereqError("Invalid disk index %s, valid values"
10901                                      " are 0 to %d" %
10902                                      (disk_op, len(instance.disks)),
10903                                      errors.ECODE_INVAL)
10904
10905     return
10906
10907   def _ConvertPlainToDrbd(self, feedback_fn):
10908     """Converts an instance from plain to drbd.
10909
10910     """
10911     feedback_fn("Converting template to drbd")
10912     instance = self.instance
10913     pnode = instance.primary_node
10914     snode = self.op.remote_node
10915
10916     # create a fake disk info for _GenerateDiskTemplate
10917     disk_info = [{constants.IDISK_SIZE: d.size, constants.IDISK_MODE: d.mode,
10918                   constants.IDISK_VG: d.logical_id[0]}
10919                  for d in instance.disks]
10920     new_disks = _GenerateDiskTemplate(self, self.op.disk_template,
10921                                       instance.name, pnode, [snode],
10922                                       disk_info, None, None, 0, feedback_fn)
10923     info = _GetInstanceInfoText(instance)
10924     feedback_fn("Creating aditional volumes...")
10925     # first, create the missing data and meta devices
10926     for disk in new_disks:
10927       # unfortunately this is... not too nice
10928       _CreateSingleBlockDev(self, pnode, instance, disk.children[1],
10929                             info, True)
10930       for child in disk.children:
10931         _CreateSingleBlockDev(self, snode, instance, child, info, True)
10932     # at this stage, all new LVs have been created, we can rename the
10933     # old ones
10934     feedback_fn("Renaming original volumes...")
10935     rename_list = [(o, n.children[0].logical_id)
10936                    for (o, n) in zip(instance.disks, new_disks)]
10937     result = self.rpc.call_blockdev_rename(pnode, rename_list)
10938     result.Raise("Failed to rename original LVs")
10939
10940     feedback_fn("Initializing DRBD devices...")
10941     # all child devices are in place, we can now create the DRBD devices
10942     for disk in new_disks:
10943       for node in [pnode, snode]:
10944         f_create = node == pnode
10945         _CreateSingleBlockDev(self, node, instance, disk, info, f_create)
10946
10947     # at this point, the instance has been modified
10948     instance.disk_template = constants.DT_DRBD8
10949     instance.disks = new_disks
10950     self.cfg.Update(instance, feedback_fn)
10951
10952     # disks are created, waiting for sync
10953     disk_abort = not _WaitForSync(self, instance,
10954                                   oneshot=not self.op.wait_for_sync)
10955     if disk_abort:
10956       raise errors.OpExecError("There are some degraded disks for"
10957                                " this instance, please cleanup manually")
10958
10959   def _ConvertDrbdToPlain(self, feedback_fn):
10960     """Converts an instance from drbd to plain.
10961
10962     """
10963     instance = self.instance
10964     assert len(instance.secondary_nodes) == 1
10965     pnode = instance.primary_node
10966     snode = instance.secondary_nodes[0]
10967     feedback_fn("Converting template to plain")
10968
10969     old_disks = instance.disks
10970     new_disks = [d.children[0] for d in old_disks]
10971
10972     # copy over size and mode
10973     for parent, child in zip(old_disks, new_disks):
10974       child.size = parent.size
10975       child.mode = parent.mode
10976
10977     # update instance structure
10978     instance.disks = new_disks
10979     instance.disk_template = constants.DT_PLAIN
10980     self.cfg.Update(instance, feedback_fn)
10981
10982     feedback_fn("Removing volumes on the secondary node...")
10983     for disk in old_disks:
10984       self.cfg.SetDiskID(disk, snode)
10985       msg = self.rpc.call_blockdev_remove(snode, disk).fail_msg
10986       if msg:
10987         self.LogWarning("Could not remove block device %s on node %s,"
10988                         " continuing anyway: %s", disk.iv_name, snode, msg)
10989
10990     feedback_fn("Removing unneeded volumes on the primary node...")
10991     for idx, disk in enumerate(old_disks):
10992       meta = disk.children[1]
10993       self.cfg.SetDiskID(meta, pnode)
10994       msg = self.rpc.call_blockdev_remove(pnode, meta).fail_msg
10995       if msg:
10996         self.LogWarning("Could not remove metadata for disk %d on node %s,"
10997                         " continuing anyway: %s", idx, pnode, msg)
10998
10999   def Exec(self, feedback_fn):
11000     """Modifies an instance.
11001
11002     All parameters take effect only at the next restart of the instance.
11003
11004     """
11005     # Process here the warnings from CheckPrereq, as we don't have a
11006     # feedback_fn there.
11007     for warn in self.warn:
11008       feedback_fn("WARNING: %s" % warn)
11009
11010     result = []
11011     instance = self.instance
11012     # disk changes
11013     for disk_op, disk_dict in self.op.disks:
11014       if disk_op == constants.DDM_REMOVE:
11015         # remove the last disk
11016         device = instance.disks.pop()
11017         device_idx = len(instance.disks)
11018         for node, disk in device.ComputeNodeTree(instance.primary_node):
11019           self.cfg.SetDiskID(disk, node)
11020           msg = self.rpc.call_blockdev_remove(node, disk).fail_msg
11021           if msg:
11022             self.LogWarning("Could not remove disk/%d on node %s: %s,"
11023                             " continuing anyway", device_idx, node, msg)
11024         result.append(("disk/%d" % device_idx, "remove"))
11025       elif disk_op == constants.DDM_ADD:
11026         # add a new disk
11027         if instance.disk_template in (constants.DT_FILE,
11028                                         constants.DT_SHARED_FILE):
11029           file_driver, file_path = instance.disks[0].logical_id
11030           file_path = os.path.dirname(file_path)
11031         else:
11032           file_driver = file_path = None
11033         disk_idx_base = len(instance.disks)
11034         new_disk = _GenerateDiskTemplate(self,
11035                                          instance.disk_template,
11036                                          instance.name, instance.primary_node,
11037                                          instance.secondary_nodes,
11038                                          [disk_dict],
11039                                          file_path,
11040                                          file_driver,
11041                                          disk_idx_base, feedback_fn)[0]
11042         instance.disks.append(new_disk)
11043         info = _GetInstanceInfoText(instance)
11044
11045         logging.info("Creating volume %s for instance %s",
11046                      new_disk.iv_name, instance.name)
11047         # Note: this needs to be kept in sync with _CreateDisks
11048         #HARDCODE
11049         for node in instance.all_nodes:
11050           f_create = node == instance.primary_node
11051           try:
11052             _CreateBlockDev(self, node, instance, new_disk,
11053                             f_create, info, f_create)
11054           except errors.OpExecError, err:
11055             self.LogWarning("Failed to create volume %s (%s) on"
11056                             " node %s: %s",
11057                             new_disk.iv_name, new_disk, node, err)
11058         result.append(("disk/%d" % disk_idx_base, "add:size=%s,mode=%s" %
11059                        (new_disk.size, new_disk.mode)))
11060       else:
11061         # change a given disk
11062         instance.disks[disk_op].mode = disk_dict[constants.IDISK_MODE]
11063         result.append(("disk.mode/%d" % disk_op,
11064                        disk_dict[constants.IDISK_MODE]))
11065
11066     if self.op.disk_template:
11067       r_shut = _ShutdownInstanceDisks(self, instance)
11068       if not r_shut:
11069         raise errors.OpExecError("Cannot shutdown instance disks, unable to"
11070                                  " proceed with disk template conversion")
11071       mode = (instance.disk_template, self.op.disk_template)
11072       try:
11073         self._DISK_CONVERSIONS[mode](self, feedback_fn)
11074       except:
11075         self.cfg.ReleaseDRBDMinors(instance.name)
11076         raise
11077       result.append(("disk_template", self.op.disk_template))
11078
11079     # NIC changes
11080     for nic_op, nic_dict in self.op.nics:
11081       if nic_op == constants.DDM_REMOVE:
11082         # remove the last nic
11083         del instance.nics[-1]
11084         result.append(("nic.%d" % len(instance.nics), "remove"))
11085       elif nic_op == constants.DDM_ADD:
11086         # mac and bridge should be set, by now
11087         mac = nic_dict[constants.INIC_MAC]
11088         ip = nic_dict.get(constants.INIC_IP, None)
11089         nicparams = self.nic_pinst[constants.DDM_ADD]
11090         new_nic = objects.NIC(mac=mac, ip=ip, nicparams=nicparams)
11091         instance.nics.append(new_nic)
11092         result.append(("nic.%d" % (len(instance.nics) - 1),
11093                        "add:mac=%s,ip=%s,mode=%s,link=%s" %
11094                        (new_nic.mac, new_nic.ip,
11095                         self.nic_pnew[constants.DDM_ADD][constants.NIC_MODE],
11096                         self.nic_pnew[constants.DDM_ADD][constants.NIC_LINK]
11097                        )))
11098       else:
11099         for key in (constants.INIC_MAC, constants.INIC_IP):
11100           if key in nic_dict:
11101             setattr(instance.nics[nic_op], key, nic_dict[key])
11102         if nic_op in self.nic_pinst:
11103           instance.nics[nic_op].nicparams = self.nic_pinst[nic_op]
11104         for key, val in nic_dict.iteritems():
11105           result.append(("nic.%s/%d" % (key, nic_op), val))
11106
11107     # hvparams changes
11108     if self.op.hvparams:
11109       instance.hvparams = self.hv_inst
11110       for key, val in self.op.hvparams.iteritems():
11111         result.append(("hv/%s" % key, val))
11112
11113     # beparams changes
11114     if self.op.beparams:
11115       instance.beparams = self.be_inst
11116       for key, val in self.op.beparams.iteritems():
11117         result.append(("be/%s" % key, val))
11118
11119     # OS change
11120     if self.op.os_name:
11121       instance.os = self.op.os_name
11122
11123     # osparams changes
11124     if self.op.osparams:
11125       instance.osparams = self.os_inst
11126       for key, val in self.op.osparams.iteritems():
11127         result.append(("os/%s" % key, val))
11128
11129     self.cfg.Update(instance, feedback_fn)
11130
11131     return result
11132
11133   _DISK_CONVERSIONS = {
11134     (constants.DT_PLAIN, constants.DT_DRBD8): _ConvertPlainToDrbd,
11135     (constants.DT_DRBD8, constants.DT_PLAIN): _ConvertDrbdToPlain,
11136     }
11137
11138
11139 class LUBackupQuery(NoHooksLU):
11140   """Query the exports list
11141
11142   """
11143   REQ_BGL = False
11144
11145   def ExpandNames(self):
11146     self.needed_locks = {}
11147     self.share_locks[locking.LEVEL_NODE] = 1
11148     if not self.op.nodes:
11149       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11150     else:
11151       self.needed_locks[locking.LEVEL_NODE] = \
11152         _GetWantedNodes(self, self.op.nodes)
11153
11154   def Exec(self, feedback_fn):
11155     """Compute the list of all the exported system images.
11156
11157     @rtype: dict
11158     @return: a dictionary with the structure node->(export-list)
11159         where export-list is a list of the instances exported on
11160         that node.
11161
11162     """
11163     self.nodes = self.glm.list_owned(locking.LEVEL_NODE)
11164     rpcresult = self.rpc.call_export_list(self.nodes)
11165     result = {}
11166     for node in rpcresult:
11167       if rpcresult[node].fail_msg:
11168         result[node] = False
11169       else:
11170         result[node] = rpcresult[node].payload
11171
11172     return result
11173
11174
11175 class LUBackupPrepare(NoHooksLU):
11176   """Prepares an instance for an export and returns useful information.
11177
11178   """
11179   REQ_BGL = False
11180
11181   def ExpandNames(self):
11182     self._ExpandAndLockInstance()
11183
11184   def CheckPrereq(self):
11185     """Check prerequisites.
11186
11187     """
11188     instance_name = self.op.instance_name
11189
11190     self.instance = self.cfg.GetInstanceInfo(instance_name)
11191     assert self.instance is not None, \
11192           "Cannot retrieve locked instance %s" % self.op.instance_name
11193     _CheckNodeOnline(self, self.instance.primary_node)
11194
11195     self._cds = _GetClusterDomainSecret()
11196
11197   def Exec(self, feedback_fn):
11198     """Prepares an instance for an export.
11199
11200     """
11201     instance = self.instance
11202
11203     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11204       salt = utils.GenerateSecret(8)
11205
11206       feedback_fn("Generating X509 certificate on %s" % instance.primary_node)
11207       result = self.rpc.call_x509_cert_create(instance.primary_node,
11208                                               constants.RIE_CERT_VALIDITY)
11209       result.Raise("Can't create X509 key and certificate on %s" % result.node)
11210
11211       (name, cert_pem) = result.payload
11212
11213       cert = OpenSSL.crypto.load_certificate(OpenSSL.crypto.FILETYPE_PEM,
11214                                              cert_pem)
11215
11216       return {
11217         "handshake": masterd.instance.ComputeRemoteExportHandshake(self._cds),
11218         "x509_key_name": (name, utils.Sha1Hmac(self._cds, name, salt=salt),
11219                           salt),
11220         "x509_ca": utils.SignX509Certificate(cert, self._cds, salt),
11221         }
11222
11223     return None
11224
11225
11226 class LUBackupExport(LogicalUnit):
11227   """Export an instance to an image in the cluster.
11228
11229   """
11230   HPATH = "instance-export"
11231   HTYPE = constants.HTYPE_INSTANCE
11232   REQ_BGL = False
11233
11234   def CheckArguments(self):
11235     """Check the arguments.
11236
11237     """
11238     self.x509_key_name = self.op.x509_key_name
11239     self.dest_x509_ca_pem = self.op.destination_x509_ca
11240
11241     if self.op.mode == constants.EXPORT_MODE_REMOTE:
11242       if not self.x509_key_name:
11243         raise errors.OpPrereqError("Missing X509 key name for encryption",
11244                                    errors.ECODE_INVAL)
11245
11246       if not self.dest_x509_ca_pem:
11247         raise errors.OpPrereqError("Missing destination X509 CA",
11248                                    errors.ECODE_INVAL)
11249
11250   def ExpandNames(self):
11251     self._ExpandAndLockInstance()
11252
11253     # Lock all nodes for local exports
11254     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11255       # FIXME: lock only instance primary and destination node
11256       #
11257       # Sad but true, for now we have do lock all nodes, as we don't know where
11258       # the previous export might be, and in this LU we search for it and
11259       # remove it from its current node. In the future we could fix this by:
11260       #  - making a tasklet to search (share-lock all), then create the
11261       #    new one, then one to remove, after
11262       #  - removing the removal operation altogether
11263       self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11264
11265   def DeclareLocks(self, level):
11266     """Last minute lock declaration."""
11267     # All nodes are locked anyway, so nothing to do here.
11268
11269   def BuildHooksEnv(self):
11270     """Build hooks env.
11271
11272     This will run on the master, primary node and target node.
11273
11274     """
11275     env = {
11276       "EXPORT_MODE": self.op.mode,
11277       "EXPORT_NODE": self.op.target_node,
11278       "EXPORT_DO_SHUTDOWN": self.op.shutdown,
11279       "SHUTDOWN_TIMEOUT": self.op.shutdown_timeout,
11280       # TODO: Generic function for boolean env variables
11281       "REMOVE_INSTANCE": str(bool(self.op.remove_instance)),
11282       }
11283
11284     env.update(_BuildInstanceHookEnvByObject(self, self.instance))
11285
11286     return env
11287
11288   def BuildHooksNodes(self):
11289     """Build hooks nodes.
11290
11291     """
11292     nl = [self.cfg.GetMasterNode(), self.instance.primary_node]
11293
11294     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11295       nl.append(self.op.target_node)
11296
11297     return (nl, nl)
11298
11299   def CheckPrereq(self):
11300     """Check prerequisites.
11301
11302     This checks that the instance and node names are valid.
11303
11304     """
11305     instance_name = self.op.instance_name
11306
11307     self.instance = self.cfg.GetInstanceInfo(instance_name)
11308     assert self.instance is not None, \
11309           "Cannot retrieve locked instance %s" % self.op.instance_name
11310     _CheckNodeOnline(self, self.instance.primary_node)
11311
11312     if (self.op.remove_instance and self.instance.admin_up and
11313         not self.op.shutdown):
11314       raise errors.OpPrereqError("Can not remove instance without shutting it"
11315                                  " down before")
11316
11317     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11318       self.op.target_node = _ExpandNodeName(self.cfg, self.op.target_node)
11319       self.dst_node = self.cfg.GetNodeInfo(self.op.target_node)
11320       assert self.dst_node is not None
11321
11322       _CheckNodeOnline(self, self.dst_node.name)
11323       _CheckNodeNotDrained(self, self.dst_node.name)
11324
11325       self._cds = None
11326       self.dest_disk_info = None
11327       self.dest_x509_ca = None
11328
11329     elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11330       self.dst_node = None
11331
11332       if len(self.op.target_node) != len(self.instance.disks):
11333         raise errors.OpPrereqError(("Received destination information for %s"
11334                                     " disks, but instance %s has %s disks") %
11335                                    (len(self.op.target_node), instance_name,
11336                                     len(self.instance.disks)),
11337                                    errors.ECODE_INVAL)
11338
11339       cds = _GetClusterDomainSecret()
11340
11341       # Check X509 key name
11342       try:
11343         (key_name, hmac_digest, hmac_salt) = self.x509_key_name
11344       except (TypeError, ValueError), err:
11345         raise errors.OpPrereqError("Invalid data for X509 key name: %s" % err)
11346
11347       if not utils.VerifySha1Hmac(cds, key_name, hmac_digest, salt=hmac_salt):
11348         raise errors.OpPrereqError("HMAC for X509 key name is wrong",
11349                                    errors.ECODE_INVAL)
11350
11351       # Load and verify CA
11352       try:
11353         (cert, _) = utils.LoadSignedX509Certificate(self.dest_x509_ca_pem, cds)
11354       except OpenSSL.crypto.Error, err:
11355         raise errors.OpPrereqError("Unable to load destination X509 CA (%s)" %
11356                                    (err, ), errors.ECODE_INVAL)
11357
11358       (errcode, msg) = utils.VerifyX509Certificate(cert, None, None)
11359       if errcode is not None:
11360         raise errors.OpPrereqError("Invalid destination X509 CA (%s)" %
11361                                    (msg, ), errors.ECODE_INVAL)
11362
11363       self.dest_x509_ca = cert
11364
11365       # Verify target information
11366       disk_info = []
11367       for idx, disk_data in enumerate(self.op.target_node):
11368         try:
11369           (host, port, magic) = \
11370             masterd.instance.CheckRemoteExportDiskInfo(cds, idx, disk_data)
11371         except errors.GenericError, err:
11372           raise errors.OpPrereqError("Target info for disk %s: %s" %
11373                                      (idx, err), errors.ECODE_INVAL)
11374
11375         disk_info.append((host, port, magic))
11376
11377       assert len(disk_info) == len(self.op.target_node)
11378       self.dest_disk_info = disk_info
11379
11380     else:
11381       raise errors.ProgrammerError("Unhandled export mode %r" %
11382                                    self.op.mode)
11383
11384     # instance disk type verification
11385     # TODO: Implement export support for file-based disks
11386     for disk in self.instance.disks:
11387       if disk.dev_type == constants.LD_FILE:
11388         raise errors.OpPrereqError("Export not supported for instances with"
11389                                    " file-based disks", errors.ECODE_INVAL)
11390
11391   def _CleanupExports(self, feedback_fn):
11392     """Removes exports of current instance from all other nodes.
11393
11394     If an instance in a cluster with nodes A..D was exported to node C, its
11395     exports will be removed from the nodes A, B and D.
11396
11397     """
11398     assert self.op.mode != constants.EXPORT_MODE_REMOTE
11399
11400     nodelist = self.cfg.GetNodeList()
11401     nodelist.remove(self.dst_node.name)
11402
11403     # on one-node clusters nodelist will be empty after the removal
11404     # if we proceed the backup would be removed because OpBackupQuery
11405     # substitutes an empty list with the full cluster node list.
11406     iname = self.instance.name
11407     if nodelist:
11408       feedback_fn("Removing old exports for instance %s" % iname)
11409       exportlist = self.rpc.call_export_list(nodelist)
11410       for node in exportlist:
11411         if exportlist[node].fail_msg:
11412           continue
11413         if iname in exportlist[node].payload:
11414           msg = self.rpc.call_export_remove(node, iname).fail_msg
11415           if msg:
11416             self.LogWarning("Could not remove older export for instance %s"
11417                             " on node %s: %s", iname, node, msg)
11418
11419   def Exec(self, feedback_fn):
11420     """Export an instance to an image in the cluster.
11421
11422     """
11423     assert self.op.mode in constants.EXPORT_MODES
11424
11425     instance = self.instance
11426     src_node = instance.primary_node
11427
11428     if self.op.shutdown:
11429       # shutdown the instance, but not the disks
11430       feedback_fn("Shutting down instance %s" % instance.name)
11431       result = self.rpc.call_instance_shutdown(src_node, instance,
11432                                                self.op.shutdown_timeout)
11433       # TODO: Maybe ignore failures if ignore_remove_failures is set
11434       result.Raise("Could not shutdown instance %s on"
11435                    " node %s" % (instance.name, src_node))
11436
11437     # set the disks ID correctly since call_instance_start needs the
11438     # correct drbd minor to create the symlinks
11439     for disk in instance.disks:
11440       self.cfg.SetDiskID(disk, src_node)
11441
11442     activate_disks = (not instance.admin_up)
11443
11444     if activate_disks:
11445       # Activate the instance disks if we'exporting a stopped instance
11446       feedback_fn("Activating disks for %s" % instance.name)
11447       _StartInstanceDisks(self, instance, None)
11448
11449     try:
11450       helper = masterd.instance.ExportInstanceHelper(self, feedback_fn,
11451                                                      instance)
11452
11453       helper.CreateSnapshots()
11454       try:
11455         if (self.op.shutdown and instance.admin_up and
11456             not self.op.remove_instance):
11457           assert not activate_disks
11458           feedback_fn("Starting instance %s" % instance.name)
11459           result = self.rpc.call_instance_start(src_node, instance,
11460                                                 None, None, False)
11461           msg = result.fail_msg
11462           if msg:
11463             feedback_fn("Failed to start instance: %s" % msg)
11464             _ShutdownInstanceDisks(self, instance)
11465             raise errors.OpExecError("Could not start instance: %s" % msg)
11466
11467         if self.op.mode == constants.EXPORT_MODE_LOCAL:
11468           (fin_resu, dresults) = helper.LocalExport(self.dst_node)
11469         elif self.op.mode == constants.EXPORT_MODE_REMOTE:
11470           connect_timeout = constants.RIE_CONNECT_TIMEOUT
11471           timeouts = masterd.instance.ImportExportTimeouts(connect_timeout)
11472
11473           (key_name, _, _) = self.x509_key_name
11474
11475           dest_ca_pem = \
11476             OpenSSL.crypto.dump_certificate(OpenSSL.crypto.FILETYPE_PEM,
11477                                             self.dest_x509_ca)
11478
11479           (fin_resu, dresults) = helper.RemoteExport(self.dest_disk_info,
11480                                                      key_name, dest_ca_pem,
11481                                                      timeouts)
11482       finally:
11483         helper.Cleanup()
11484
11485       # Check for backwards compatibility
11486       assert len(dresults) == len(instance.disks)
11487       assert compat.all(isinstance(i, bool) for i in dresults), \
11488              "Not all results are boolean: %r" % dresults
11489
11490     finally:
11491       if activate_disks:
11492         feedback_fn("Deactivating disks for %s" % instance.name)
11493         _ShutdownInstanceDisks(self, instance)
11494
11495     if not (compat.all(dresults) and fin_resu):
11496       failures = []
11497       if not fin_resu:
11498         failures.append("export finalization")
11499       if not compat.all(dresults):
11500         fdsk = utils.CommaJoin(idx for (idx, dsk) in enumerate(dresults)
11501                                if not dsk)
11502         failures.append("disk export: disk(s) %s" % fdsk)
11503
11504       raise errors.OpExecError("Export failed, errors in %s" %
11505                                utils.CommaJoin(failures))
11506
11507     # At this point, the export was successful, we can cleanup/finish
11508
11509     # Remove instance if requested
11510     if self.op.remove_instance:
11511       feedback_fn("Removing instance %s" % instance.name)
11512       _RemoveInstance(self, feedback_fn, instance,
11513                       self.op.ignore_remove_failures)
11514
11515     if self.op.mode == constants.EXPORT_MODE_LOCAL:
11516       self._CleanupExports(feedback_fn)
11517
11518     return fin_resu, dresults
11519
11520
11521 class LUBackupRemove(NoHooksLU):
11522   """Remove exports related to the named instance.
11523
11524   """
11525   REQ_BGL = False
11526
11527   def ExpandNames(self):
11528     self.needed_locks = {}
11529     # We need all nodes to be locked in order for RemoveExport to work, but we
11530     # don't need to lock the instance itself, as nothing will happen to it (and
11531     # we can remove exports also for a removed instance)
11532     self.needed_locks[locking.LEVEL_NODE] = locking.ALL_SET
11533
11534   def Exec(self, feedback_fn):
11535     """Remove any export.
11536
11537     """
11538     instance_name = self.cfg.ExpandInstanceName(self.op.instance_name)
11539     # If the instance was not found we'll try with the name that was passed in.
11540     # This will only work if it was an FQDN, though.
11541     fqdn_warn = False
11542     if not instance_name:
11543       fqdn_warn = True
11544       instance_name = self.op.instance_name
11545
11546     locked_nodes = self.glm.list_owned(locking.LEVEL_NODE)
11547     exportlist = self.rpc.call_export_list(locked_nodes)
11548     found = False
11549     for node in exportlist:
11550       msg = exportlist[node].fail_msg
11551       if msg:
11552         self.LogWarning("Failed to query node %s (continuing): %s", node, msg)
11553         continue
11554       if instance_name in exportlist[node].payload:
11555         found = True
11556         result = self.rpc.call_export_remove(node, instance_name)
11557         msg = result.fail_msg
11558         if msg:
11559           logging.error("Could not remove export for instance %s"
11560                         " on node %s: %s", instance_name, node, msg)
11561
11562     if fqdn_warn and not found:
11563       feedback_fn("Export not found. If trying to remove an export belonging"
11564                   " to a deleted instance please use its Fully Qualified"
11565                   " Domain Name.")
11566
11567
11568 class LUGroupAdd(LogicalUnit):
11569   """Logical unit for creating node groups.
11570
11571   """
11572   HPATH = "group-add"
11573   HTYPE = constants.HTYPE_GROUP
11574   REQ_BGL = False
11575
11576   def ExpandNames(self):
11577     # We need the new group's UUID here so that we can create and acquire the
11578     # corresponding lock. Later, in Exec(), we'll indicate to cfg.AddNodeGroup
11579     # that it should not check whether the UUID exists in the configuration.
11580     self.group_uuid = self.cfg.GenerateUniqueID(self.proc.GetECId())
11581     self.needed_locks = {}
11582     self.add_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
11583
11584   def CheckPrereq(self):
11585     """Check prerequisites.
11586
11587     This checks that the given group name is not an existing node group
11588     already.
11589
11590     """
11591     try:
11592       existing_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11593     except errors.OpPrereqError:
11594       pass
11595     else:
11596       raise errors.OpPrereqError("Desired group name '%s' already exists as a"
11597                                  " node group (UUID: %s)" %
11598                                  (self.op.group_name, existing_uuid),
11599                                  errors.ECODE_EXISTS)
11600
11601     if self.op.ndparams:
11602       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11603
11604   def BuildHooksEnv(self):
11605     """Build hooks env.
11606
11607     """
11608     return {
11609       "GROUP_NAME": self.op.group_name,
11610       }
11611
11612   def BuildHooksNodes(self):
11613     """Build hooks nodes.
11614
11615     """
11616     mn = self.cfg.GetMasterNode()
11617     return ([mn], [mn])
11618
11619   def Exec(self, feedback_fn):
11620     """Add the node group to the cluster.
11621
11622     """
11623     group_obj = objects.NodeGroup(name=self.op.group_name, members=[],
11624                                   uuid=self.group_uuid,
11625                                   alloc_policy=self.op.alloc_policy,
11626                                   ndparams=self.op.ndparams)
11627
11628     self.cfg.AddNodeGroup(group_obj, self.proc.GetECId(), check_uuid=False)
11629     del self.remove_locks[locking.LEVEL_NODEGROUP]
11630
11631
11632 class LUGroupAssignNodes(NoHooksLU):
11633   """Logical unit for assigning nodes to groups.
11634
11635   """
11636   REQ_BGL = False
11637
11638   def ExpandNames(self):
11639     # These raise errors.OpPrereqError on their own:
11640     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11641     self.op.nodes = _GetWantedNodes(self, self.op.nodes)
11642
11643     # We want to lock all the affected nodes and groups. We have readily
11644     # available the list of nodes, and the *destination* group. To gather the
11645     # list of "source" groups, we need to fetch node information later on.
11646     self.needed_locks = {
11647       locking.LEVEL_NODEGROUP: set([self.group_uuid]),
11648       locking.LEVEL_NODE: self.op.nodes,
11649       }
11650
11651   def DeclareLocks(self, level):
11652     if level == locking.LEVEL_NODEGROUP:
11653       assert len(self.needed_locks[locking.LEVEL_NODEGROUP]) == 1
11654
11655       # Try to get all affected nodes' groups without having the group or node
11656       # lock yet. Needs verification later in the code flow.
11657       groups = self.cfg.GetNodeGroupsFromNodes(self.op.nodes)
11658
11659       self.needed_locks[locking.LEVEL_NODEGROUP].update(groups)
11660
11661   def CheckPrereq(self):
11662     """Check prerequisites.
11663
11664     """
11665     assert self.needed_locks[locking.LEVEL_NODEGROUP]
11666     assert (frozenset(self.glm.list_owned(locking.LEVEL_NODE)) ==
11667             frozenset(self.op.nodes))
11668
11669     expected_locks = (set([self.group_uuid]) |
11670                       self.cfg.GetNodeGroupsFromNodes(self.op.nodes))
11671     actual_locks = self.glm.list_owned(locking.LEVEL_NODEGROUP)
11672     if actual_locks != expected_locks:
11673       raise errors.OpExecError("Nodes changed groups since locks were acquired,"
11674                                " current groups are '%s', used to be '%s'" %
11675                                (utils.CommaJoin(expected_locks),
11676                                 utils.CommaJoin(actual_locks)))
11677
11678     self.node_data = self.cfg.GetAllNodesInfo()
11679     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11680     instance_data = self.cfg.GetAllInstancesInfo()
11681
11682     if self.group is None:
11683       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11684                                (self.op.group_name, self.group_uuid))
11685
11686     (new_splits, previous_splits) = \
11687       self.CheckAssignmentForSplitInstances([(node, self.group_uuid)
11688                                              for node in self.op.nodes],
11689                                             self.node_data, instance_data)
11690
11691     if new_splits:
11692       fmt_new_splits = utils.CommaJoin(utils.NiceSort(new_splits))
11693
11694       if not self.op.force:
11695         raise errors.OpExecError("The following instances get split by this"
11696                                  " change and --force was not given: %s" %
11697                                  fmt_new_splits)
11698       else:
11699         self.LogWarning("This operation will split the following instances: %s",
11700                         fmt_new_splits)
11701
11702         if previous_splits:
11703           self.LogWarning("In addition, these already-split instances continue"
11704                           " to be split across groups: %s",
11705                           utils.CommaJoin(utils.NiceSort(previous_splits)))
11706
11707   def Exec(self, feedback_fn):
11708     """Assign nodes to a new group.
11709
11710     """
11711     for node in self.op.nodes:
11712       self.node_data[node].group = self.group_uuid
11713
11714     # FIXME: Depends on side-effects of modifying the result of
11715     # C{cfg.GetAllNodesInfo}
11716
11717     self.cfg.Update(self.group, feedback_fn) # Saves all modified nodes.
11718
11719   @staticmethod
11720   def CheckAssignmentForSplitInstances(changes, node_data, instance_data):
11721     """Check for split instances after a node assignment.
11722
11723     This method considers a series of node assignments as an atomic operation,
11724     and returns information about split instances after applying the set of
11725     changes.
11726
11727     In particular, it returns information about newly split instances, and
11728     instances that were already split, and remain so after the change.
11729
11730     Only instances whose disk template is listed in constants.DTS_INT_MIRROR are
11731     considered.
11732
11733     @type changes: list of (node_name, new_group_uuid) pairs.
11734     @param changes: list of node assignments to consider.
11735     @param node_data: a dict with data for all nodes
11736     @param instance_data: a dict with all instances to consider
11737     @rtype: a two-tuple
11738     @return: a list of instances that were previously okay and result split as a
11739       consequence of this change, and a list of instances that were previously
11740       split and this change does not fix.
11741
11742     """
11743     changed_nodes = dict((node, group) for node, group in changes
11744                          if node_data[node].group != group)
11745
11746     all_split_instances = set()
11747     previously_split_instances = set()
11748
11749     def InstanceNodes(instance):
11750       return [instance.primary_node] + list(instance.secondary_nodes)
11751
11752     for inst in instance_data.values():
11753       if inst.disk_template not in constants.DTS_INT_MIRROR:
11754         continue
11755
11756       instance_nodes = InstanceNodes(inst)
11757
11758       if len(set(node_data[node].group for node in instance_nodes)) > 1:
11759         previously_split_instances.add(inst.name)
11760
11761       if len(set(changed_nodes.get(node, node_data[node].group)
11762                  for node in instance_nodes)) > 1:
11763         all_split_instances.add(inst.name)
11764
11765     return (list(all_split_instances - previously_split_instances),
11766             list(previously_split_instances & all_split_instances))
11767
11768
11769 class _GroupQuery(_QueryBase):
11770   FIELDS = query.GROUP_FIELDS
11771
11772   def ExpandNames(self, lu):
11773     lu.needed_locks = {}
11774
11775     self._all_groups = lu.cfg.GetAllNodeGroupsInfo()
11776     name_to_uuid = dict((g.name, g.uuid) for g in self._all_groups.values())
11777
11778     if not self.names:
11779       self.wanted = [name_to_uuid[name]
11780                      for name in utils.NiceSort(name_to_uuid.keys())]
11781     else:
11782       # Accept names to be either names or UUIDs.
11783       missing = []
11784       self.wanted = []
11785       all_uuid = frozenset(self._all_groups.keys())
11786
11787       for name in self.names:
11788         if name in all_uuid:
11789           self.wanted.append(name)
11790         elif name in name_to_uuid:
11791           self.wanted.append(name_to_uuid[name])
11792         else:
11793           missing.append(name)
11794
11795       if missing:
11796         raise errors.OpPrereqError("Some groups do not exist: %s" %
11797                                    utils.CommaJoin(missing),
11798                                    errors.ECODE_NOENT)
11799
11800   def DeclareLocks(self, lu, level):
11801     pass
11802
11803   def _GetQueryData(self, lu):
11804     """Computes the list of node groups and their attributes.
11805
11806     """
11807     do_nodes = query.GQ_NODE in self.requested_data
11808     do_instances = query.GQ_INST in self.requested_data
11809
11810     group_to_nodes = None
11811     group_to_instances = None
11812
11813     # For GQ_NODE, we need to map group->[nodes], and group->[instances] for
11814     # GQ_INST. The former is attainable with just GetAllNodesInfo(), but for the
11815     # latter GetAllInstancesInfo() is not enough, for we have to go through
11816     # instance->node. Hence, we will need to process nodes even if we only need
11817     # instance information.
11818     if do_nodes or do_instances:
11819       all_nodes = lu.cfg.GetAllNodesInfo()
11820       group_to_nodes = dict((uuid, []) for uuid in self.wanted)
11821       node_to_group = {}
11822
11823       for node in all_nodes.values():
11824         if node.group in group_to_nodes:
11825           group_to_nodes[node.group].append(node.name)
11826           node_to_group[node.name] = node.group
11827
11828       if do_instances:
11829         all_instances = lu.cfg.GetAllInstancesInfo()
11830         group_to_instances = dict((uuid, []) for uuid in self.wanted)
11831
11832         for instance in all_instances.values():
11833           node = instance.primary_node
11834           if node in node_to_group:
11835             group_to_instances[node_to_group[node]].append(instance.name)
11836
11837         if not do_nodes:
11838           # Do not pass on node information if it was not requested.
11839           group_to_nodes = None
11840
11841     return query.GroupQueryData([self._all_groups[uuid]
11842                                  for uuid in self.wanted],
11843                                 group_to_nodes, group_to_instances)
11844
11845
11846 class LUGroupQuery(NoHooksLU):
11847   """Logical unit for querying node groups.
11848
11849   """
11850   REQ_BGL = False
11851
11852   def CheckArguments(self):
11853     self.gq = _GroupQuery(qlang.MakeSimpleFilter("name", self.op.names),
11854                           self.op.output_fields, False)
11855
11856   def ExpandNames(self):
11857     self.gq.ExpandNames(self)
11858
11859   def Exec(self, feedback_fn):
11860     return self.gq.OldStyleQuery(self)
11861
11862
11863 class LUGroupSetParams(LogicalUnit):
11864   """Modifies the parameters of a node group.
11865
11866   """
11867   HPATH = "group-modify"
11868   HTYPE = constants.HTYPE_GROUP
11869   REQ_BGL = False
11870
11871   def CheckArguments(self):
11872     all_changes = [
11873       self.op.ndparams,
11874       self.op.alloc_policy,
11875       ]
11876
11877     if all_changes.count(None) == len(all_changes):
11878       raise errors.OpPrereqError("Please pass at least one modification",
11879                                  errors.ECODE_INVAL)
11880
11881   def ExpandNames(self):
11882     # This raises errors.OpPrereqError on its own:
11883     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11884
11885     self.needed_locks = {
11886       locking.LEVEL_NODEGROUP: [self.group_uuid],
11887       }
11888
11889   def CheckPrereq(self):
11890     """Check prerequisites.
11891
11892     """
11893     self.group = self.cfg.GetNodeGroup(self.group_uuid)
11894
11895     if self.group is None:
11896       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
11897                                (self.op.group_name, self.group_uuid))
11898
11899     if self.op.ndparams:
11900       new_ndparams = _GetUpdatedParams(self.group.ndparams, self.op.ndparams)
11901       utils.ForceDictType(self.op.ndparams, constants.NDS_PARAMETER_TYPES)
11902       self.new_ndparams = new_ndparams
11903
11904   def BuildHooksEnv(self):
11905     """Build hooks env.
11906
11907     """
11908     return {
11909       "GROUP_NAME": self.op.group_name,
11910       "NEW_ALLOC_POLICY": self.op.alloc_policy,
11911       }
11912
11913   def BuildHooksNodes(self):
11914     """Build hooks nodes.
11915
11916     """
11917     mn = self.cfg.GetMasterNode()
11918     return ([mn], [mn])
11919
11920   def Exec(self, feedback_fn):
11921     """Modifies the node group.
11922
11923     """
11924     result = []
11925
11926     if self.op.ndparams:
11927       self.group.ndparams = self.new_ndparams
11928       result.append(("ndparams", str(self.group.ndparams)))
11929
11930     if self.op.alloc_policy:
11931       self.group.alloc_policy = self.op.alloc_policy
11932
11933     self.cfg.Update(self.group, feedback_fn)
11934     return result
11935
11936
11937
11938 class LUGroupRemove(LogicalUnit):
11939   HPATH = "group-remove"
11940   HTYPE = constants.HTYPE_GROUP
11941   REQ_BGL = False
11942
11943   def ExpandNames(self):
11944     # This will raises errors.OpPrereqError on its own:
11945     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
11946     self.needed_locks = {
11947       locking.LEVEL_NODEGROUP: [self.group_uuid],
11948       }
11949
11950   def CheckPrereq(self):
11951     """Check prerequisites.
11952
11953     This checks that the given group name exists as a node group, that is
11954     empty (i.e., contains no nodes), and that is not the last group of the
11955     cluster.
11956
11957     """
11958     # Verify that the group is empty.
11959     group_nodes = [node.name
11960                    for node in self.cfg.GetAllNodesInfo().values()
11961                    if node.group == self.group_uuid]
11962
11963     if group_nodes:
11964       raise errors.OpPrereqError("Group '%s' not empty, has the following"
11965                                  " nodes: %s" %
11966                                  (self.op.group_name,
11967                                   utils.CommaJoin(utils.NiceSort(group_nodes))),
11968                                  errors.ECODE_STATE)
11969
11970     # Verify the cluster would not be left group-less.
11971     if len(self.cfg.GetNodeGroupList()) == 1:
11972       raise errors.OpPrereqError("Group '%s' is the only group,"
11973                                  " cannot be removed" %
11974                                  self.op.group_name,
11975                                  errors.ECODE_STATE)
11976
11977   def BuildHooksEnv(self):
11978     """Build hooks env.
11979
11980     """
11981     return {
11982       "GROUP_NAME": self.op.group_name,
11983       }
11984
11985   def BuildHooksNodes(self):
11986     """Build hooks nodes.
11987
11988     """
11989     mn = self.cfg.GetMasterNode()
11990     return ([mn], [mn])
11991
11992   def Exec(self, feedback_fn):
11993     """Remove the node group.
11994
11995     """
11996     try:
11997       self.cfg.RemoveNodeGroup(self.group_uuid)
11998     except errors.ConfigurationError:
11999       raise errors.OpExecError("Group '%s' with UUID %s disappeared" %
12000                                (self.op.group_name, self.group_uuid))
12001
12002     self.remove_locks[locking.LEVEL_NODEGROUP] = self.group_uuid
12003
12004
12005 class LUGroupRename(LogicalUnit):
12006   HPATH = "group-rename"
12007   HTYPE = constants.HTYPE_GROUP
12008   REQ_BGL = False
12009
12010   def ExpandNames(self):
12011     # This raises errors.OpPrereqError on its own:
12012     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12013
12014     self.needed_locks = {
12015       locking.LEVEL_NODEGROUP: [self.group_uuid],
12016       }
12017
12018   def CheckPrereq(self):
12019     """Check prerequisites.
12020
12021     Ensures requested new name is not yet used.
12022
12023     """
12024     try:
12025       new_name_uuid = self.cfg.LookupNodeGroup(self.op.new_name)
12026     except errors.OpPrereqError:
12027       pass
12028     else:
12029       raise errors.OpPrereqError("Desired new name '%s' clashes with existing"
12030                                  " node group (UUID: %s)" %
12031                                  (self.op.new_name, new_name_uuid),
12032                                  errors.ECODE_EXISTS)
12033
12034   def BuildHooksEnv(self):
12035     """Build hooks env.
12036
12037     """
12038     return {
12039       "OLD_NAME": self.op.group_name,
12040       "NEW_NAME": self.op.new_name,
12041       }
12042
12043   def BuildHooksNodes(self):
12044     """Build hooks nodes.
12045
12046     """
12047     mn = self.cfg.GetMasterNode()
12048
12049     all_nodes = self.cfg.GetAllNodesInfo()
12050     all_nodes.pop(mn, None)
12051
12052     run_nodes = [mn]
12053     run_nodes.extend(node.name for node in all_nodes.values()
12054                      if node.group == self.group_uuid)
12055
12056     return (run_nodes, run_nodes)
12057
12058   def Exec(self, feedback_fn):
12059     """Rename the node group.
12060
12061     """
12062     group = self.cfg.GetNodeGroup(self.group_uuid)
12063
12064     if group is None:
12065       raise errors.OpExecError("Could not retrieve group '%s' (UUID: %s)" %
12066                                (self.op.group_name, self.group_uuid))
12067
12068     group.name = self.op.new_name
12069     self.cfg.Update(group, feedback_fn)
12070
12071     return self.op.new_name
12072
12073
12074 class LUGroupEvacuate(LogicalUnit):
12075   HPATH = "group-evacuate"
12076   HTYPE = constants.HTYPE_GROUP
12077   REQ_BGL = False
12078
12079   def ExpandNames(self):
12080     # This raises errors.OpPrereqError on its own:
12081     self.group_uuid = self.cfg.LookupNodeGroup(self.op.group_name)
12082
12083     if self.op.target_groups:
12084       self.req_target_uuids = map(self.cfg.LookupNodeGroup,
12085                                   self.op.target_groups)
12086     else:
12087       self.req_target_uuids = []
12088
12089     if self.group_uuid in self.req_target_uuids:
12090       raise errors.OpPrereqError("Group to be evacuated (%s) can not be used"
12091                                  " as a target group (targets are %s)" %
12092                                  (self.group_uuid,
12093                                   utils.CommaJoin(self.req_target_uuids)),
12094                                  errors.ECODE_INVAL)
12095
12096     self.op.iallocator = _GetDefaultIAllocator(self.cfg, self.op.iallocator)
12097
12098     self.share_locks = _ShareAll()
12099     self.needed_locks = {
12100       locking.LEVEL_INSTANCE: [],
12101       locking.LEVEL_NODEGROUP: [],
12102       locking.LEVEL_NODE: [],
12103       }
12104
12105   def DeclareLocks(self, level):
12106     if level == locking.LEVEL_INSTANCE:
12107       assert not self.needed_locks[locking.LEVEL_INSTANCE]
12108
12109       # Lock instances optimistically, needs verification once node and group
12110       # locks have been acquired
12111       self.needed_locks[locking.LEVEL_INSTANCE] = \
12112         self.cfg.GetNodeGroupInstances(self.group_uuid)
12113
12114     elif level == locking.LEVEL_NODEGROUP:
12115       assert not self.needed_locks[locking.LEVEL_NODEGROUP]
12116
12117       if self.req_target_uuids:
12118         lock_groups = set([self.group_uuid] + self.req_target_uuids)
12119
12120         # Lock all groups used by instances optimistically; this requires going
12121         # via the node before it's locked, requiring verification later on
12122         lock_groups.update(group_uuid
12123                            for instance_name in
12124                              self.glm.list_owned(locking.LEVEL_INSTANCE)
12125                            for group_uuid in
12126                              self.cfg.GetInstanceNodeGroups(instance_name))
12127       else:
12128         # No target groups, need to lock all of them
12129         lock_groups = locking.ALL_SET
12130
12131       self.needed_locks[locking.LEVEL_NODEGROUP] = lock_groups
12132
12133     elif level == locking.LEVEL_NODE:
12134       # This will only lock the nodes in the group to be evacuated which
12135       # contain actual instances
12136       self.recalculate_locks[locking.LEVEL_NODE] = constants.LOCKS_APPEND
12137       self._LockInstancesNodes()
12138
12139       # Lock all nodes in group to be evacuated
12140       assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12141       member_nodes = self.cfg.GetNodeGroup(self.group_uuid).members
12142       self.needed_locks[locking.LEVEL_NODE].extend(member_nodes)
12143
12144   def CheckPrereq(self):
12145     owned_instances = frozenset(self.glm.list_owned(locking.LEVEL_INSTANCE))
12146     owned_groups = frozenset(self.glm.list_owned(locking.LEVEL_NODEGROUP))
12147     owned_nodes = frozenset(self.glm.list_owned(locking.LEVEL_NODE))
12148
12149     assert owned_groups.issuperset(self.req_target_uuids)
12150     assert self.group_uuid in owned_groups
12151
12152     # Check if locked instances are still correct
12153     wanted_instances = self.cfg.GetNodeGroupInstances(self.group_uuid)
12154     if owned_instances != wanted_instances:
12155       raise errors.OpPrereqError("Instances in node group to be evacuated (%s)"
12156                                  " changed since locks were acquired, wanted"
12157                                  " %s, have %s; retry the operation" %
12158                                  (self.group_uuid,
12159                                   utils.CommaJoin(wanted_instances),
12160                                   utils.CommaJoin(owned_instances)),
12161                                  errors.ECODE_STATE)
12162
12163     # Get instance information
12164     self.instances = dict(self.cfg.GetMultiInstanceInfo(owned_instances))
12165
12166     # Check if node groups for locked instances are still correct
12167     for instance_name in owned_instances:
12168       inst = self.instances[instance_name]
12169       assert self.group_uuid in self.cfg.GetInstanceNodeGroups(instance_name), \
12170         "Instance %s has no node in group %s" % (instance_name, self.group_uuid)
12171       assert owned_nodes.issuperset(inst.all_nodes), \
12172         "Instance %s's nodes changed while we kept the lock" % instance_name
12173
12174       inst_groups = self.cfg.GetInstanceNodeGroups(instance_name)
12175       if not owned_groups.issuperset(inst_groups):
12176         raise errors.OpPrereqError("Instance %s's node groups changed since"
12177                                    " locks were acquired, current groups"
12178                                    " are '%s', owning groups '%s'; retry the"
12179                                    " operation" %
12180                                    (instance_name,
12181                                     utils.CommaJoin(inst_groups),
12182                                     utils.CommaJoin(owned_groups)),
12183                                    errors.ECODE_STATE)
12184
12185     if self.req_target_uuids:
12186       # User requested specific target groups
12187       self.target_uuids = self.req_target_uuids
12188     else:
12189       # All groups except the one to be evacuated are potential targets
12190       self.target_uuids = [group_uuid for group_uuid in owned_groups
12191                            if group_uuid != self.group_uuid]
12192
12193       if not self.target_uuids:
12194         raise errors.OpPrereqError("There are no possible target groups",
12195                                    errors.ECODE_INVAL)
12196
12197   def BuildHooksEnv(self):
12198     """Build hooks env.
12199
12200     """
12201     return {
12202       "GROUP_NAME": self.op.group_name,
12203       "TARGET_GROUPS": " ".join(self.target_uuids),
12204       }
12205
12206   def BuildHooksNodes(self):
12207     """Build hooks nodes.
12208
12209     """
12210     mn = self.cfg.GetMasterNode()
12211
12212     assert self.group_uuid in self.glm.list_owned(locking.LEVEL_NODEGROUP)
12213
12214     run_nodes = [mn] + self.cfg.GetNodeGroup(self.group_uuid).members
12215
12216     return (run_nodes, run_nodes)
12217
12218   def Exec(self, feedback_fn):
12219     instances = list(self.glm.list_owned(locking.LEVEL_INSTANCE))
12220
12221     assert self.group_uuid not in self.target_uuids
12222
12223     ial = IAllocator(self.cfg, self.rpc, constants.IALLOCATOR_MODE_CHG_GROUP,
12224                      instances=instances, target_groups=self.target_uuids)
12225
12226     ial.Run(self.op.iallocator)
12227
12228     if not ial.success:
12229       raise errors.OpPrereqError("Can't compute group evacuation using"
12230                                  " iallocator '%s': %s" %
12231                                  (self.op.iallocator, ial.info),
12232                                  errors.ECODE_NORES)
12233
12234     jobs = _LoadNodeEvacResult(self, ial.result, self.op.early_release, False)
12235
12236     self.LogInfo("Iallocator returned %s job(s) for evacuating node group %s",
12237                  len(jobs), self.op.group_name)
12238
12239     return ResultWithJobs(jobs)
12240
12241
12242 class TagsLU(NoHooksLU): # pylint: disable-msg=W0223
12243   """Generic tags LU.
12244
12245   This is an abstract class which is the parent of all the other tags LUs.
12246
12247   """
12248   def ExpandNames(self):
12249     self.group_uuid = None
12250     self.needed_locks = {}
12251     if self.op.kind == constants.TAG_NODE:
12252       self.op.name = _ExpandNodeName(self.cfg, self.op.name)
12253       self.needed_locks[locking.LEVEL_NODE] = self.op.name
12254     elif self.op.kind == constants.TAG_INSTANCE:
12255       self.op.name = _ExpandInstanceName(self.cfg, self.op.name)
12256       self.needed_locks[locking.LEVEL_INSTANCE] = self.op.name
12257     elif self.op.kind == constants.TAG_NODEGROUP:
12258       self.group_uuid = self.cfg.LookupNodeGroup(self.op.name)
12259
12260     # FIXME: Acquire BGL for cluster tag operations (as of this writing it's
12261     # not possible to acquire the BGL based on opcode parameters)
12262
12263   def CheckPrereq(self):
12264     """Check prerequisites.
12265
12266     """
12267     if self.op.kind == constants.TAG_CLUSTER:
12268       self.target = self.cfg.GetClusterInfo()
12269     elif self.op.kind == constants.TAG_NODE:
12270       self.target = self.cfg.GetNodeInfo(self.op.name)
12271     elif self.op.kind == constants.TAG_INSTANCE:
12272       self.target = self.cfg.GetInstanceInfo(self.op.name)
12273     elif self.op.kind == constants.TAG_NODEGROUP:
12274       self.target = self.cfg.GetNodeGroup(self.group_uuid)
12275     else:
12276       raise errors.OpPrereqError("Wrong tag type requested (%s)" %
12277                                  str(self.op.kind), errors.ECODE_INVAL)
12278
12279
12280 class LUTagsGet(TagsLU):
12281   """Returns the tags of a given object.
12282
12283   """
12284   REQ_BGL = False
12285
12286   def ExpandNames(self):
12287     TagsLU.ExpandNames(self)
12288
12289     # Share locks as this is only a read operation
12290     self.share_locks = _ShareAll()
12291
12292   def Exec(self, feedback_fn):
12293     """Returns the tag list.
12294
12295     """
12296     return list(self.target.GetTags())
12297
12298
12299 class LUTagsSearch(NoHooksLU):
12300   """Searches the tags for a given pattern.
12301
12302   """
12303   REQ_BGL = False
12304
12305   def ExpandNames(self):
12306     self.needed_locks = {}
12307
12308   def CheckPrereq(self):
12309     """Check prerequisites.
12310
12311     This checks the pattern passed for validity by compiling it.
12312
12313     """
12314     try:
12315       self.re = re.compile(self.op.pattern)
12316     except re.error, err:
12317       raise errors.OpPrereqError("Invalid search pattern '%s': %s" %
12318                                  (self.op.pattern, err), errors.ECODE_INVAL)
12319
12320   def Exec(self, feedback_fn):
12321     """Returns the tag list.
12322
12323     """
12324     cfg = self.cfg
12325     tgts = [("/cluster", cfg.GetClusterInfo())]
12326     ilist = cfg.GetAllInstancesInfo().values()
12327     tgts.extend([("/instances/%s" % i.name, i) for i in ilist])
12328     nlist = cfg.GetAllNodesInfo().values()
12329     tgts.extend([("/nodes/%s" % n.name, n) for n in nlist])
12330     tgts.extend(("/nodegroup/%s" % n.name, n)
12331                 for n in cfg.GetAllNodeGroupsInfo().values())
12332     results = []
12333     for path, target in tgts:
12334       for tag in target.GetTags():
12335         if self.re.search(tag):
12336           results.append((path, tag))
12337     return results
12338
12339
12340 class LUTagsSet(TagsLU):
12341   """Sets a tag on a given object.
12342
12343   """
12344   REQ_BGL = False
12345
12346   def CheckPrereq(self):
12347     """Check prerequisites.
12348
12349     This checks the type and length of the tag name and value.
12350
12351     """
12352     TagsLU.CheckPrereq(self)
12353     for tag in self.op.tags:
12354       objects.TaggableObject.ValidateTag(tag)
12355
12356   def Exec(self, feedback_fn):
12357     """Sets the tag.
12358
12359     """
12360     try:
12361       for tag in self.op.tags:
12362         self.target.AddTag(tag)
12363     except errors.TagError, err:
12364       raise errors.OpExecError("Error while setting tag: %s" % str(err))
12365     self.cfg.Update(self.target, feedback_fn)
12366
12367
12368 class LUTagsDel(TagsLU):
12369   """Delete a list of tags from a given object.
12370
12371   """
12372   REQ_BGL = False
12373
12374   def CheckPrereq(self):
12375     """Check prerequisites.
12376
12377     This checks that we have the given tag.
12378
12379     """
12380     TagsLU.CheckPrereq(self)
12381     for tag in self.op.tags:
12382       objects.TaggableObject.ValidateTag(tag)
12383     del_tags = frozenset(self.op.tags)
12384     cur_tags = self.target.GetTags()
12385
12386     diff_tags = del_tags - cur_tags
12387     if diff_tags:
12388       diff_names = ("'%s'" % i for i in sorted(diff_tags))
12389       raise errors.OpPrereqError("Tag(s) %s not found" %
12390                                  (utils.CommaJoin(diff_names), ),
12391                                  errors.ECODE_NOENT)
12392
12393   def Exec(self, feedback_fn):
12394     """Remove the tag from the object.
12395
12396     """
12397     for tag in self.op.tags:
12398       self.target.RemoveTag(tag)
12399     self.cfg.Update(self.target, feedback_fn)
12400
12401
12402 class LUTestDelay(NoHooksLU):
12403   """Sleep for a specified amount of time.
12404
12405   This LU sleeps on the master and/or nodes for a specified amount of
12406   time.
12407
12408   """
12409   REQ_BGL = False
12410
12411   def ExpandNames(self):
12412     """Expand names and set required locks.
12413
12414     This expands the node list, if any.
12415
12416     """
12417     self.needed_locks = {}
12418     if self.op.on_nodes:
12419       # _GetWantedNodes can be used here, but is not always appropriate to use
12420       # this way in ExpandNames. Check LogicalUnit.ExpandNames docstring for
12421       # more information.
12422       self.op.on_nodes = _GetWantedNodes(self, self.op.on_nodes)
12423       self.needed_locks[locking.LEVEL_NODE] = self.op.on_nodes
12424
12425   def _TestDelay(self):
12426     """Do the actual sleep.
12427
12428     """
12429     if self.op.on_master:
12430       if not utils.TestDelay(self.op.duration):
12431         raise errors.OpExecError("Error during master delay test")
12432     if self.op.on_nodes:
12433       result = self.rpc.call_test_delay(self.op.on_nodes, self.op.duration)
12434       for node, node_result in result.items():
12435         node_result.Raise("Failure during rpc call to node %s" % node)
12436
12437   def Exec(self, feedback_fn):
12438     """Execute the test delay opcode, with the wanted repetitions.
12439
12440     """
12441     if self.op.repeat == 0:
12442       self._TestDelay()
12443     else:
12444       top_value = self.op.repeat - 1
12445       for i in range(self.op.repeat):
12446         self.LogInfo("Test delay iteration %d/%d" % (i, top_value))
12447         self._TestDelay()
12448
12449
12450 class LUTestJqueue(NoHooksLU):
12451   """Utility LU to test some aspects of the job queue.
12452
12453   """
12454   REQ_BGL = False
12455
12456   # Must be lower than default timeout for WaitForJobChange to see whether it
12457   # notices changed jobs
12458   _CLIENT_CONNECT_TIMEOUT = 20.0
12459   _CLIENT_CONFIRM_TIMEOUT = 60.0
12460
12461   @classmethod
12462   def _NotifyUsingSocket(cls, cb, errcls):
12463     """Opens a Unix socket and waits for another program to connect.
12464
12465     @type cb: callable
12466     @param cb: Callback to send socket name to client
12467     @type errcls: class
12468     @param errcls: Exception class to use for errors
12469
12470     """
12471     # Using a temporary directory as there's no easy way to create temporary
12472     # sockets without writing a custom loop around tempfile.mktemp and
12473     # socket.bind
12474     tmpdir = tempfile.mkdtemp()
12475     try:
12476       tmpsock = utils.PathJoin(tmpdir, "sock")
12477
12478       logging.debug("Creating temporary socket at %s", tmpsock)
12479       sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
12480       try:
12481         sock.bind(tmpsock)
12482         sock.listen(1)
12483
12484         # Send details to client
12485         cb(tmpsock)
12486
12487         # Wait for client to connect before continuing
12488         sock.settimeout(cls._CLIENT_CONNECT_TIMEOUT)
12489         try:
12490           (conn, _) = sock.accept()
12491         except socket.error, err:
12492           raise errcls("Client didn't connect in time (%s)" % err)
12493       finally:
12494         sock.close()
12495     finally:
12496       # Remove as soon as client is connected
12497       shutil.rmtree(tmpdir)
12498
12499     # Wait for client to close
12500     try:
12501       try:
12502         # pylint: disable-msg=E1101
12503         # Instance of '_socketobject' has no ... member
12504         conn.settimeout(cls._CLIENT_CONFIRM_TIMEOUT)
12505         conn.recv(1)
12506       except socket.error, err:
12507         raise errcls("Client failed to confirm notification (%s)" % err)
12508     finally:
12509       conn.close()
12510
12511   def _SendNotification(self, test, arg, sockname):
12512     """Sends a notification to the client.
12513
12514     @type test: string
12515     @param test: Test name
12516     @param arg: Test argument (depends on test)
12517     @type sockname: string
12518     @param sockname: Socket path
12519
12520     """
12521     self.Log(constants.ELOG_JQUEUE_TEST, (sockname, test, arg))
12522
12523   def _Notify(self, prereq, test, arg):
12524     """Notifies the client of a test.
12525
12526     @type prereq: bool
12527     @param prereq: Whether this is a prereq-phase test
12528     @type test: string
12529     @param test: Test name
12530     @param arg: Test argument (depends on test)
12531
12532     """
12533     if prereq:
12534       errcls = errors.OpPrereqError
12535     else:
12536       errcls = errors.OpExecError
12537
12538     return self._NotifyUsingSocket(compat.partial(self._SendNotification,
12539                                                   test, arg),
12540                                    errcls)
12541
12542   def CheckArguments(self):
12543     self.checkargs_calls = getattr(self, "checkargs_calls", 0) + 1
12544     self.expandnames_calls = 0
12545
12546   def ExpandNames(self):
12547     checkargs_calls = getattr(self, "checkargs_calls", 0)
12548     if checkargs_calls < 1:
12549       raise errors.ProgrammerError("CheckArguments was not called")
12550
12551     self.expandnames_calls += 1
12552
12553     if self.op.notify_waitlock:
12554       self._Notify(True, constants.JQT_EXPANDNAMES, None)
12555
12556     self.LogInfo("Expanding names")
12557
12558     # Get lock on master node (just to get a lock, not for a particular reason)
12559     self.needed_locks = {
12560       locking.LEVEL_NODE: self.cfg.GetMasterNode(),
12561       }
12562
12563   def Exec(self, feedback_fn):
12564     if self.expandnames_calls < 1:
12565       raise errors.ProgrammerError("ExpandNames was not called")
12566
12567     if self.op.notify_exec:
12568       self._Notify(False, constants.JQT_EXEC, None)
12569
12570     self.LogInfo("Executing")
12571
12572     if self.op.log_messages:
12573       self._Notify(False, constants.JQT_STARTMSG, len(self.op.log_messages))
12574       for idx, msg in enumerate(self.op.log_messages):
12575         self.LogInfo("Sending log message %s", idx + 1)
12576         feedback_fn(constants.JQT_MSGPREFIX + msg)
12577         # Report how many test messages have been sent
12578         self._Notify(False, constants.JQT_LOGMSG, idx + 1)
12579
12580     if self.op.fail:
12581       raise errors.OpExecError("Opcode failure was requested")
12582
12583     return True
12584
12585
12586 class IAllocator(object):
12587   """IAllocator framework.
12588
12589   An IAllocator instance has three sets of attributes:
12590     - cfg that is needed to query the cluster
12591     - input data (all members of the _KEYS class attribute are required)
12592     - four buffer attributes (in|out_data|text), that represent the
12593       input (to the external script) in text and data structure format,
12594       and the output from it, again in two formats
12595     - the result variables from the script (success, info, nodes) for
12596       easy usage
12597
12598   """
12599   # pylint: disable-msg=R0902
12600   # lots of instance attributes
12601
12602   def __init__(self, cfg, rpc, mode, **kwargs):
12603     self.cfg = cfg
12604     self.rpc = rpc
12605     # init buffer variables
12606     self.in_text = self.out_text = self.in_data = self.out_data = None
12607     # init all input fields so that pylint is happy
12608     self.mode = mode
12609     self.memory = self.disks = self.disk_template = None
12610     self.os = self.tags = self.nics = self.vcpus = None
12611     self.hypervisor = None
12612     self.relocate_from = None
12613     self.name = None
12614     self.evac_nodes = None
12615     self.instances = None
12616     self.evac_mode = None
12617     self.target_groups = []
12618     # computed fields
12619     self.required_nodes = None
12620     # init result fields
12621     self.success = self.info = self.result = None
12622
12623     try:
12624       (fn, keydata, self._result_check) = self._MODE_DATA[self.mode]
12625     except KeyError:
12626       raise errors.ProgrammerError("Unknown mode '%s' passed to the"
12627                                    " IAllocator" % self.mode)
12628
12629     keyset = [n for (n, _) in keydata]
12630
12631     for key in kwargs:
12632       if key not in keyset:
12633         raise errors.ProgrammerError("Invalid input parameter '%s' to"
12634                                      " IAllocator" % key)
12635       setattr(self, key, kwargs[key])
12636
12637     for key in keyset:
12638       if key not in kwargs:
12639         raise errors.ProgrammerError("Missing input parameter '%s' to"
12640                                      " IAllocator" % key)
12641     self._BuildInputData(compat.partial(fn, self), keydata)
12642
12643   def _ComputeClusterData(self):
12644     """Compute the generic allocator input data.
12645
12646     This is the data that is independent of the actual operation.
12647
12648     """
12649     cfg = self.cfg
12650     cluster_info = cfg.GetClusterInfo()
12651     # cluster data
12652     data = {
12653       "version": constants.IALLOCATOR_VERSION,
12654       "cluster_name": cfg.GetClusterName(),
12655       "cluster_tags": list(cluster_info.GetTags()),
12656       "enabled_hypervisors": list(cluster_info.enabled_hypervisors),
12657       # we don't have job IDs
12658       }
12659     ninfo = cfg.GetAllNodesInfo()
12660     iinfo = cfg.GetAllInstancesInfo().values()
12661     i_list = [(inst, cluster_info.FillBE(inst)) for inst in iinfo]
12662
12663     # node data
12664     node_list = [n.name for n in ninfo.values() if n.vm_capable]
12665
12666     if self.mode == constants.IALLOCATOR_MODE_ALLOC:
12667       hypervisor_name = self.hypervisor
12668     elif self.mode == constants.IALLOCATOR_MODE_RELOC:
12669       hypervisor_name = cfg.GetInstanceInfo(self.name).hypervisor
12670     else:
12671       hypervisor_name = cluster_info.enabled_hypervisors[0]
12672
12673     node_data = self.rpc.call_node_info(node_list, cfg.GetVGName(),
12674                                         hypervisor_name)
12675     node_iinfo = \
12676       self.rpc.call_all_instances_info(node_list,
12677                                        cluster_info.enabled_hypervisors)
12678
12679     data["nodegroups"] = self._ComputeNodeGroupData(cfg)
12680
12681     config_ndata = self._ComputeBasicNodeData(ninfo)
12682     data["nodes"] = self._ComputeDynamicNodeData(ninfo, node_data, node_iinfo,
12683                                                  i_list, config_ndata)
12684     assert len(data["nodes"]) == len(ninfo), \
12685         "Incomplete node data computed"
12686
12687     data["instances"] = self._ComputeInstanceData(cluster_info, i_list)
12688
12689     self.in_data = data
12690
12691   @staticmethod
12692   def _ComputeNodeGroupData(cfg):
12693     """Compute node groups data.
12694
12695     """
12696     ng = dict((guuid, {
12697       "name": gdata.name,
12698       "alloc_policy": gdata.alloc_policy,
12699       })
12700       for guuid, gdata in cfg.GetAllNodeGroupsInfo().items())
12701
12702     return ng
12703
12704   @staticmethod
12705   def _ComputeBasicNodeData(node_cfg):
12706     """Compute global node data.
12707
12708     @rtype: dict
12709     @returns: a dict of name: (node dict, node config)
12710
12711     """
12712     # fill in static (config-based) values
12713     node_results = dict((ninfo.name, {
12714       "tags": list(ninfo.GetTags()),
12715       "primary_ip": ninfo.primary_ip,
12716       "secondary_ip": ninfo.secondary_ip,
12717       "offline": ninfo.offline,
12718       "drained": ninfo.drained,
12719       "master_candidate": ninfo.master_candidate,
12720       "group": ninfo.group,
12721       "master_capable": ninfo.master_capable,
12722       "vm_capable": ninfo.vm_capable,
12723       })
12724       for ninfo in node_cfg.values())
12725
12726     return node_results
12727
12728   @staticmethod
12729   def _ComputeDynamicNodeData(node_cfg, node_data, node_iinfo, i_list,
12730                               node_results):
12731     """Compute global node data.
12732
12733     @param node_results: the basic node structures as filled from the config
12734
12735     """
12736     # make a copy of the current dict
12737     node_results = dict(node_results)
12738     for nname, nresult in node_data.items():
12739       assert nname in node_results, "Missing basic data for node %s" % nname
12740       ninfo = node_cfg[nname]
12741
12742       if not (ninfo.offline or ninfo.drained):
12743         nresult.Raise("Can't get data for node %s" % nname)
12744         node_iinfo[nname].Raise("Can't get node instance info from node %s" %
12745                                 nname)
12746         remote_info = nresult.payload
12747
12748         for attr in ["memory_total", "memory_free", "memory_dom0",
12749                      "vg_size", "vg_free", "cpu_total"]:
12750           if attr not in remote_info:
12751             raise errors.OpExecError("Node '%s' didn't return attribute"
12752                                      " '%s'" % (nname, attr))
12753           if not isinstance(remote_info[attr], int):
12754             raise errors.OpExecError("Node '%s' returned invalid value"
12755                                      " for '%s': %s" %
12756                                      (nname, attr, remote_info[attr]))
12757         # compute memory used by primary instances
12758         i_p_mem = i_p_up_mem = 0
12759         for iinfo, beinfo in i_list:
12760           if iinfo.primary_node == nname:
12761             i_p_mem += beinfo[constants.BE_MEMORY]
12762             if iinfo.name not in node_iinfo[nname].payload:
12763               i_used_mem = 0
12764             else:
12765               i_used_mem = int(node_iinfo[nname].payload[iinfo.name]["memory"])
12766             i_mem_diff = beinfo[constants.BE_MEMORY] - i_used_mem
12767             remote_info["memory_free"] -= max(0, i_mem_diff)
12768
12769             if iinfo.admin_up:
12770               i_p_up_mem += beinfo[constants.BE_MEMORY]
12771
12772         # compute memory used by instances
12773         pnr_dyn = {
12774           "total_memory": remote_info["memory_total"],
12775           "reserved_memory": remote_info["memory_dom0"],
12776           "free_memory": remote_info["memory_free"],
12777           "total_disk": remote_info["vg_size"],
12778           "free_disk": remote_info["vg_free"],
12779           "total_cpus": remote_info["cpu_total"],
12780           "i_pri_memory": i_p_mem,
12781           "i_pri_up_memory": i_p_up_mem,
12782           }
12783         pnr_dyn.update(node_results[nname])
12784         node_results[nname] = pnr_dyn
12785
12786     return node_results
12787
12788   @staticmethod
12789   def _ComputeInstanceData(cluster_info, i_list):
12790     """Compute global instance data.
12791
12792     """
12793     instance_data = {}
12794     for iinfo, beinfo in i_list:
12795       nic_data = []
12796       for nic in iinfo.nics:
12797         filled_params = cluster_info.SimpleFillNIC(nic.nicparams)
12798         nic_dict = {
12799           "mac": nic.mac,
12800           "ip": nic.ip,
12801           "mode": filled_params[constants.NIC_MODE],
12802           "link": filled_params[constants.NIC_LINK],
12803           }
12804         if filled_params[constants.NIC_MODE] == constants.NIC_MODE_BRIDGED:
12805           nic_dict["bridge"] = filled_params[constants.NIC_LINK]
12806         nic_data.append(nic_dict)
12807       pir = {
12808         "tags": list(iinfo.GetTags()),
12809         "admin_up": iinfo.admin_up,
12810         "vcpus": beinfo[constants.BE_VCPUS],
12811         "memory": beinfo[constants.BE_MEMORY],
12812         "os": iinfo.os,
12813         "nodes": [iinfo.primary_node] + list(iinfo.secondary_nodes),
12814         "nics": nic_data,
12815         "disks": [{constants.IDISK_SIZE: dsk.size,
12816                    constants.IDISK_MODE: dsk.mode}
12817                   for dsk in iinfo.disks],
12818         "disk_template": iinfo.disk_template,
12819         "hypervisor": iinfo.hypervisor,
12820         }
12821       pir["disk_space_total"] = _ComputeDiskSize(iinfo.disk_template,
12822                                                  pir["disks"])
12823       instance_data[iinfo.name] = pir
12824
12825     return instance_data
12826
12827   def _AddNewInstance(self):
12828     """Add new instance data to allocator structure.
12829
12830     This in combination with _AllocatorGetClusterData will create the
12831     correct structure needed as input for the allocator.
12832
12833     The checks for the completeness of the opcode must have already been
12834     done.
12835
12836     """
12837     disk_space = _ComputeDiskSize(self.disk_template, self.disks)
12838
12839     if self.disk_template in constants.DTS_INT_MIRROR:
12840       self.required_nodes = 2
12841     else:
12842       self.required_nodes = 1
12843
12844     request = {
12845       "name": self.name,
12846       "disk_template": self.disk_template,
12847       "tags": self.tags,
12848       "os": self.os,
12849       "vcpus": self.vcpus,
12850       "memory": self.memory,
12851       "disks": self.disks,
12852       "disk_space_total": disk_space,
12853       "nics": self.nics,
12854       "required_nodes": self.required_nodes,
12855       "hypervisor": self.hypervisor,
12856       }
12857
12858     return request
12859
12860   def _AddRelocateInstance(self):
12861     """Add relocate instance data to allocator structure.
12862
12863     This in combination with _IAllocatorGetClusterData will create the
12864     correct structure needed as input for the allocator.
12865
12866     The checks for the completeness of the opcode must have already been
12867     done.
12868
12869     """
12870     instance = self.cfg.GetInstanceInfo(self.name)
12871     if instance is None:
12872       raise errors.ProgrammerError("Unknown instance '%s' passed to"
12873                                    " IAllocator" % self.name)
12874
12875     if instance.disk_template not in constants.DTS_MIRRORED:
12876       raise errors.OpPrereqError("Can't relocate non-mirrored instances",
12877                                  errors.ECODE_INVAL)
12878
12879     if instance.disk_template in constants.DTS_INT_MIRROR and \
12880         len(instance.secondary_nodes) != 1:
12881       raise errors.OpPrereqError("Instance has not exactly one secondary node",
12882                                  errors.ECODE_STATE)
12883
12884     self.required_nodes = 1
12885     disk_sizes = [{constants.IDISK_SIZE: disk.size} for disk in instance.disks]
12886     disk_space = _ComputeDiskSize(instance.disk_template, disk_sizes)
12887
12888     request = {
12889       "name": self.name,
12890       "disk_space_total": disk_space,
12891       "required_nodes": self.required_nodes,
12892       "relocate_from": self.relocate_from,
12893       }
12894     return request
12895
12896   def _AddEvacuateNodes(self):
12897     """Add evacuate nodes data to allocator structure.
12898
12899     """
12900     request = {
12901       "evac_nodes": self.evac_nodes
12902       }
12903     return request
12904
12905   def _AddNodeEvacuate(self):
12906     """Get data for node-evacuate requests.
12907
12908     """
12909     return {
12910       "instances": self.instances,
12911       "evac_mode": self.evac_mode,
12912       }
12913
12914   def _AddChangeGroup(self):
12915     """Get data for node-evacuate requests.
12916
12917     """
12918     return {
12919       "instances": self.instances,
12920       "target_groups": self.target_groups,
12921       }
12922
12923   def _BuildInputData(self, fn, keydata):
12924     """Build input data structures.
12925
12926     """
12927     self._ComputeClusterData()
12928
12929     request = fn()
12930     request["type"] = self.mode
12931     for keyname, keytype in keydata:
12932       if keyname not in request:
12933         raise errors.ProgrammerError("Request parameter %s is missing" %
12934                                      keyname)
12935       val = request[keyname]
12936       if not keytype(val):
12937         raise errors.ProgrammerError("Request parameter %s doesn't pass"
12938                                      " validation, value %s, expected"
12939                                      " type %s" % (keyname, val, keytype))
12940     self.in_data["request"] = request
12941
12942     self.in_text = serializer.Dump(self.in_data)
12943
12944   _STRING_LIST = ht.TListOf(ht.TString)
12945   _JOB_LIST = ht.TListOf(ht.TListOf(ht.TStrictDict(True, False, {
12946      # pylint: disable-msg=E1101
12947      # Class '...' has no 'OP_ID' member
12948      "OP_ID": ht.TElemOf([opcodes.OpInstanceFailover.OP_ID,
12949                           opcodes.OpInstanceMigrate.OP_ID,
12950                           opcodes.OpInstanceReplaceDisks.OP_ID])
12951      })))
12952
12953   _NEVAC_MOVED = \
12954     ht.TListOf(ht.TAnd(ht.TIsLength(3),
12955                        ht.TItems([ht.TNonEmptyString,
12956                                   ht.TNonEmptyString,
12957                                   ht.TListOf(ht.TNonEmptyString),
12958                                  ])))
12959   _NEVAC_FAILED = \
12960     ht.TListOf(ht.TAnd(ht.TIsLength(2),
12961                        ht.TItems([ht.TNonEmptyString,
12962                                   ht.TMaybeString,
12963                                  ])))
12964   _NEVAC_RESULT = ht.TAnd(ht.TIsLength(3),
12965                           ht.TItems([_NEVAC_MOVED, _NEVAC_FAILED, _JOB_LIST]))
12966
12967   _MODE_DATA = {
12968     constants.IALLOCATOR_MODE_ALLOC:
12969       (_AddNewInstance,
12970        [
12971         ("name", ht.TString),
12972         ("memory", ht.TInt),
12973         ("disks", ht.TListOf(ht.TDict)),
12974         ("disk_template", ht.TString),
12975         ("os", ht.TString),
12976         ("tags", _STRING_LIST),
12977         ("nics", ht.TListOf(ht.TDict)),
12978         ("vcpus", ht.TInt),
12979         ("hypervisor", ht.TString),
12980         ], ht.TList),
12981     constants.IALLOCATOR_MODE_RELOC:
12982       (_AddRelocateInstance,
12983        [("name", ht.TString), ("relocate_from", _STRING_LIST)],
12984        ht.TList),
12985     constants.IALLOCATOR_MODE_MEVAC:
12986       (_AddEvacuateNodes, [("evac_nodes", _STRING_LIST)],
12987        ht.TListOf(ht.TAnd(ht.TIsLength(2), _STRING_LIST))),
12988      constants.IALLOCATOR_MODE_NODE_EVAC:
12989       (_AddNodeEvacuate, [
12990         ("instances", _STRING_LIST),
12991         ("evac_mode", ht.TElemOf(constants.IALLOCATOR_NEVAC_MODES)),
12992         ], _NEVAC_RESULT),
12993      constants.IALLOCATOR_MODE_CHG_GROUP:
12994       (_AddChangeGroup, [
12995         ("instances", _STRING_LIST),
12996         ("target_groups", _STRING_LIST),
12997         ], _NEVAC_RESULT),
12998     }
12999
13000   def Run(self, name, validate=True, call_fn=None):
13001     """Run an instance allocator and return the results.
13002
13003     """
13004     if call_fn is None:
13005       call_fn = self.rpc.call_iallocator_runner
13006
13007     result = call_fn(self.cfg.GetMasterNode(), name, self.in_text)
13008     result.Raise("Failure while running the iallocator script")
13009
13010     self.out_text = result.payload
13011     if validate:
13012       self._ValidateResult()
13013
13014   def _ValidateResult(self):
13015     """Process the allocator results.
13016
13017     This will process and if successful save the result in
13018     self.out_data and the other parameters.
13019
13020     """
13021     try:
13022       rdict = serializer.Load(self.out_text)
13023     except Exception, err:
13024       raise errors.OpExecError("Can't parse iallocator results: %s" % str(err))
13025
13026     if not isinstance(rdict, dict):
13027       raise errors.OpExecError("Can't parse iallocator results: not a dict")
13028
13029     # TODO: remove backwards compatiblity in later versions
13030     if "nodes" in rdict and "result" not in rdict:
13031       rdict["result"] = rdict["nodes"]
13032       del rdict["nodes"]
13033
13034     for key in "success", "info", "result":
13035       if key not in rdict:
13036         raise errors.OpExecError("Can't parse iallocator results:"
13037                                  " missing key '%s'" % key)
13038       setattr(self, key, rdict[key])
13039
13040     if not self._result_check(self.result):
13041       raise errors.OpExecError("Iallocator returned invalid result,"
13042                                " expected %s, got %s" %
13043                                (self._result_check, self.result),
13044                                errors.ECODE_INVAL)
13045
13046     if self.mode in (constants.IALLOCATOR_MODE_RELOC,
13047                      constants.IALLOCATOR_MODE_MEVAC):
13048       node2group = dict((name, ndata["group"])
13049                         for (name, ndata) in self.in_data["nodes"].items())
13050
13051       fn = compat.partial(self._NodesToGroups, node2group,
13052                           self.in_data["nodegroups"])
13053
13054       if self.mode == constants.IALLOCATOR_MODE_RELOC:
13055         assert self.relocate_from is not None
13056         assert self.required_nodes == 1
13057
13058         request_groups = fn(self.relocate_from)
13059         result_groups = fn(rdict["result"])
13060
13061         if result_groups != request_groups:
13062           raise errors.OpExecError("Groups of nodes returned by iallocator (%s)"
13063                                    " differ from original groups (%s)" %
13064                                    (utils.CommaJoin(result_groups),
13065                                     utils.CommaJoin(request_groups)))
13066       elif self.mode == constants.IALLOCATOR_MODE_MEVAC:
13067         request_groups = fn(self.evac_nodes)
13068         for (instance_name, secnode) in self.result:
13069           result_groups = fn([secnode])
13070           if result_groups != request_groups:
13071             raise errors.OpExecError("Iallocator returned new secondary node"
13072                                      " '%s' (group '%s') for instance '%s'"
13073                                      " which is not in original group '%s'" %
13074                                      (secnode, utils.CommaJoin(result_groups),
13075                                       instance_name,
13076                                       utils.CommaJoin(request_groups)))
13077       else:
13078         raise errors.ProgrammerError("Unhandled mode '%s'" % self.mode)
13079
13080     elif self.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13081       assert self.evac_mode in constants.IALLOCATOR_NEVAC_MODES
13082
13083     self.out_data = rdict
13084
13085   @staticmethod
13086   def _NodesToGroups(node2group, groups, nodes):
13087     """Returns a list of unique group names for a list of nodes.
13088
13089     @type node2group: dict
13090     @param node2group: Map from node name to group UUID
13091     @type groups: dict
13092     @param groups: Group information
13093     @type nodes: list
13094     @param nodes: Node names
13095
13096     """
13097     result = set()
13098
13099     for node in nodes:
13100       try:
13101         group_uuid = node2group[node]
13102       except KeyError:
13103         # Ignore unknown node
13104         pass
13105       else:
13106         try:
13107           group = groups[group_uuid]
13108         except KeyError:
13109           # Can't find group, let's use UUID
13110           group_name = group_uuid
13111         else:
13112           group_name = group["name"]
13113
13114         result.add(group_name)
13115
13116     return sorted(result)
13117
13118
13119 class LUTestAllocator(NoHooksLU):
13120   """Run allocator tests.
13121
13122   This LU runs the allocator tests
13123
13124   """
13125   def CheckPrereq(self):
13126     """Check prerequisites.
13127
13128     This checks the opcode parameters depending on the director and mode test.
13129
13130     """
13131     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13132       for attr in ["memory", "disks", "disk_template",
13133                    "os", "tags", "nics", "vcpus"]:
13134         if not hasattr(self.op, attr):
13135           raise errors.OpPrereqError("Missing attribute '%s' on opcode input" %
13136                                      attr, errors.ECODE_INVAL)
13137       iname = self.cfg.ExpandInstanceName(self.op.name)
13138       if iname is not None:
13139         raise errors.OpPrereqError("Instance '%s' already in the cluster" %
13140                                    iname, errors.ECODE_EXISTS)
13141       if not isinstance(self.op.nics, list):
13142         raise errors.OpPrereqError("Invalid parameter 'nics'",
13143                                    errors.ECODE_INVAL)
13144       if not isinstance(self.op.disks, list):
13145         raise errors.OpPrereqError("Invalid parameter 'disks'",
13146                                    errors.ECODE_INVAL)
13147       for row in self.op.disks:
13148         if (not isinstance(row, dict) or
13149             constants.IDISK_SIZE not in row or
13150             not isinstance(row[constants.IDISK_SIZE], int) or
13151             constants.IDISK_MODE not in row or
13152             row[constants.IDISK_MODE] not in constants.DISK_ACCESS_SET):
13153           raise errors.OpPrereqError("Invalid contents of the 'disks'"
13154                                      " parameter", errors.ECODE_INVAL)
13155       if self.op.hypervisor is None:
13156         self.op.hypervisor = self.cfg.GetHypervisorType()
13157     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13158       fname = _ExpandInstanceName(self.cfg, self.op.name)
13159       self.op.name = fname
13160       self.relocate_from = \
13161           list(self.cfg.GetInstanceInfo(fname).secondary_nodes)
13162     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13163       if not hasattr(self.op, "evac_nodes"):
13164         raise errors.OpPrereqError("Missing attribute 'evac_nodes' on"
13165                                    " opcode input", errors.ECODE_INVAL)
13166     elif self.op.mode in (constants.IALLOCATOR_MODE_CHG_GROUP,
13167                           constants.IALLOCATOR_MODE_NODE_EVAC):
13168       if not self.op.instances:
13169         raise errors.OpPrereqError("Missing instances", errors.ECODE_INVAL)
13170       self.op.instances = _GetWantedInstances(self, self.op.instances)
13171     else:
13172       raise errors.OpPrereqError("Invalid test allocator mode '%s'" %
13173                                  self.op.mode, errors.ECODE_INVAL)
13174
13175     if self.op.direction == constants.IALLOCATOR_DIR_OUT:
13176       if self.op.allocator is None:
13177         raise errors.OpPrereqError("Missing allocator name",
13178                                    errors.ECODE_INVAL)
13179     elif self.op.direction != constants.IALLOCATOR_DIR_IN:
13180       raise errors.OpPrereqError("Wrong allocator test '%s'" %
13181                                  self.op.direction, errors.ECODE_INVAL)
13182
13183   def Exec(self, feedback_fn):
13184     """Run the allocator test.
13185
13186     """
13187     if self.op.mode == constants.IALLOCATOR_MODE_ALLOC:
13188       ial = IAllocator(self.cfg, self.rpc,
13189                        mode=self.op.mode,
13190                        name=self.op.name,
13191                        memory=self.op.memory,
13192                        disks=self.op.disks,
13193                        disk_template=self.op.disk_template,
13194                        os=self.op.os,
13195                        tags=self.op.tags,
13196                        nics=self.op.nics,
13197                        vcpus=self.op.vcpus,
13198                        hypervisor=self.op.hypervisor,
13199                        )
13200     elif self.op.mode == constants.IALLOCATOR_MODE_RELOC:
13201       ial = IAllocator(self.cfg, self.rpc,
13202                        mode=self.op.mode,
13203                        name=self.op.name,
13204                        relocate_from=list(self.relocate_from),
13205                        )
13206     elif self.op.mode == constants.IALLOCATOR_MODE_MEVAC:
13207       ial = IAllocator(self.cfg, self.rpc,
13208                        mode=self.op.mode,
13209                        evac_nodes=self.op.evac_nodes)
13210     elif self.op.mode == constants.IALLOCATOR_MODE_CHG_GROUP:
13211       ial = IAllocator(self.cfg, self.rpc,
13212                        mode=self.op.mode,
13213                        instances=self.op.instances,
13214                        target_groups=self.op.target_groups)
13215     elif self.op.mode == constants.IALLOCATOR_MODE_NODE_EVAC:
13216       ial = IAllocator(self.cfg, self.rpc,
13217                        mode=self.op.mode,
13218                        instances=self.op.instances,
13219                        evac_mode=self.op.evac_mode)
13220     else:
13221       raise errors.ProgrammerError("Uncatched mode %s in"
13222                                    " LUTestAllocator.Exec", self.op.mode)
13223
13224     if self.op.direction == constants.IALLOCATOR_DIR_IN:
13225       result = ial.in_text
13226     else:
13227       ial.Run(self.op.allocator, validate=False)
13228       result = ial.out_text
13229     return result
13230
13231
13232 #: Query type implementations
13233 _QUERY_IMPL = {
13234   constants.QR_INSTANCE: _InstanceQuery,
13235   constants.QR_NODE: _NodeQuery,
13236   constants.QR_GROUP: _GroupQuery,
13237   constants.QR_OS: _OsQuery,
13238   }
13239
13240 assert set(_QUERY_IMPL.keys()) == constants.QR_VIA_OP
13241
13242
13243 def _GetQueryImplementation(name):
13244   """Returns the implemtnation for a query type.
13245
13246   @param name: Query type, must be one of L{constants.QR_VIA_OP}
13247
13248   """
13249   try:
13250     return _QUERY_IMPL[name]
13251   except KeyError:
13252     raise errors.OpPrereqError("Unknown query resource '%s'" % name,
13253                                errors.ECODE_INVAL)